Compare commits
866 Commits
v0.0.11
...
async-reba
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9cd7c82e77 | ||
|
|
43161c816e | ||
|
|
6644c06af1 | ||
|
|
ed47212e07 | ||
|
|
db9cb74364 | ||
|
|
f64902eb25 | ||
|
|
e115a274d6 | ||
|
|
00239c2fd4 | ||
|
|
c0f9ad19fe | ||
|
|
46ac76701e | ||
|
|
1f77863aef | ||
|
|
d7555609fd | ||
|
|
7fe118ce63 | ||
|
|
44a349386c | ||
|
|
97cba92fa5 | ||
|
|
d9b16d4f73 | ||
|
|
50b6580fbb | ||
|
|
e7548f9494 | ||
|
|
830d2df671 | ||
|
|
13b50a07db | ||
|
|
4501dca133 | ||
|
|
2c8e566507 | ||
|
|
6e8a202107 | ||
|
|
2a05cd35b0 | ||
|
|
55a70cde8f | ||
|
|
706c00d897 | ||
|
|
d323ea9e95 | ||
|
|
b8ece84c6e | ||
|
|
a018112a13 | ||
|
|
d3a477902b | ||
|
|
298b151486 | ||
|
|
6a6ea251ae | ||
|
|
c7c709a0a7 | ||
|
|
6ac57b4854 | ||
|
|
f5e0b946c7 | ||
|
|
b1818cc370 | ||
|
|
d05717a1bd | ||
|
|
d11daee31a | ||
|
|
73da8c1910 | ||
|
|
f06aa300d0 | ||
|
|
c4e94e280e | ||
|
|
8f2941c575 | ||
|
|
447baad5c3 | ||
|
|
2703813e8a | ||
|
|
521e152150 | ||
|
|
3d43ad0f4d | ||
|
|
3621fceae2 | ||
|
|
e123f33c03 | ||
|
|
b8713666c2 | ||
|
|
cf0ab85e2c | ||
|
|
8502c7c801 | ||
|
|
e89814dc6b | ||
|
|
9461bacf0d | ||
|
|
e276dcbab7 | ||
|
|
1a3de0e819 | ||
|
|
ee3786fe15 | ||
|
|
31b5667cee | ||
|
|
a483f1a083 | ||
|
|
2ecec1c9f8 | ||
|
|
08ac311971 | ||
|
|
cb49b6a0d6 | ||
|
|
016da177db | ||
|
|
ec5998bc36 | ||
|
|
b1e17ee347 | ||
|
|
b6e1d6e6ae | ||
|
|
fa609f1afc | ||
|
|
470b5eafe7 | ||
|
|
2e5b0c1d6b | ||
|
|
a9390d96a1 | ||
|
|
8ee9621d66 | ||
|
|
49f2123893 | ||
|
|
cf72129852 | ||
|
|
8edee8155d | ||
|
|
c262b272fa | ||
|
|
9ef9c1c58a | ||
|
|
c7ff79a652 | ||
|
|
da81df5284 | ||
|
|
a4420dc88b | ||
|
|
eeb8338dce | ||
|
|
dfa4ac81fd | ||
|
|
ea16dca8aa | ||
|
|
306632b29a | ||
|
|
4533ed014f | ||
|
|
68cc4186ad | ||
|
|
9a4e749c7c | ||
|
|
55c645c614 | ||
|
|
a1024bb365 | ||
|
|
dfc82c3ba4 | ||
|
|
9e27a8aad0 | ||
|
|
c73111afea | ||
|
|
26a64afd8d | ||
|
|
78a3f081de | ||
|
|
e8f8a49646 | ||
|
|
219304c5ee | ||
|
|
f3fd312b83 | ||
|
|
357e66d64d | ||
|
|
4fa1ea8c4b | ||
|
|
3b81cd462d | ||
|
|
14acf05a26 | ||
|
|
58d9c84bc9 | ||
|
|
7e39d9ad3d | ||
|
|
a4edb3dab1 | ||
|
|
ed409d0460 | ||
|
|
50b45ac2da | ||
|
|
29bcbc68c5 | ||
|
|
affbe9ac7d | ||
|
|
1790fa452f | ||
|
|
607a246572 | ||
|
|
4f1b06e6b2 | ||
|
|
62e9a33a70 | ||
|
|
3298f935ef | ||
|
|
0e8f56c752 | ||
|
|
8224538372 | ||
|
|
fbf6eef68f | ||
|
|
f078d156de | ||
|
|
23d6eed5ea | ||
|
|
0ed3d118d6 | ||
|
|
337f048864 | ||
|
|
6f3c421621 | ||
|
|
eadd68d40b | ||
|
|
71202e3cd5 | ||
|
|
75008d8f11 | ||
|
|
2da0ecbe3c | ||
|
|
c7f814b2dc | ||
|
|
13a4a05388 | ||
|
|
20c019ae16 | ||
|
|
d9d6571c73 | ||
|
|
540cad4844 | ||
|
|
0a26b650c0 | ||
|
|
adaac003e5 | ||
|
|
3d4f125071 | ||
|
|
bce87f8717 | ||
|
|
1fe940bd6b | ||
|
|
cb36a71381 | ||
|
|
5acc4928fe | ||
|
|
434493b8aa | ||
|
|
f08b25dbb2 | ||
|
|
3665734972 | ||
|
|
a98d78cdea | ||
|
|
80f6d74e80 | ||
|
|
02d926e9bd | ||
|
|
7749692f72 | ||
|
|
7807cbeb39 | ||
|
|
72f231b327 | ||
|
|
3cbe97d346 | ||
|
|
b880e1a60e | ||
|
|
886046e696 | ||
|
|
9106a5f8ae | ||
|
|
98286336bf | ||
|
|
fa0deededa | ||
|
|
081b001c8b | ||
|
|
c92531a02f | ||
|
|
748a7af602 | ||
|
|
f4a0de6327 | ||
|
|
e405d7af9f | ||
|
|
51cd7fd285 | ||
|
|
aba5f89174 | ||
|
|
5c0f5a1613 | ||
|
|
7c342f7ba2 | ||
|
|
37e2388758 | ||
|
|
05f0492a8d | ||
|
|
c0ac5c6ae8 | ||
|
|
be923687fb | ||
|
|
5f32fb125d | ||
|
|
ae6fbb3146 | ||
|
|
864768635a | ||
|
|
d7c9679977 | ||
|
|
fedfc366f6 | ||
|
|
b3b39626e1 | ||
|
|
4e0ece17b6 | ||
|
|
fd3fdacdee | ||
|
|
a253606d50 | ||
|
|
568d9dc0a3 | ||
|
|
6629b853c5 | ||
|
|
3931cb3235 | ||
|
|
38cd86ad52 | ||
|
|
c0cdabf61d | ||
|
|
51270a96c5 | ||
|
|
84d72c0d5c | ||
|
|
79aca8169a | ||
|
|
b9d362bd62 | ||
|
|
87c4a1bee1 | ||
|
|
c979762b70 | ||
|
|
1d92fc3199 | ||
|
|
8ac7fb1a67 | ||
|
|
60c3d33def | ||
|
|
8a39d3f4eb | ||
|
|
e038767b6f | ||
|
|
0c46b3e481 | ||
|
|
d42f072ff5 | ||
|
|
9b6f29c24a | ||
|
|
873d5dc23f | ||
|
|
6d141fd47f | ||
|
|
c6f6cb2947 | ||
|
|
0eb189ce7f | ||
|
|
f4fd7b7028 | ||
|
|
21de8e0a35 | ||
|
|
6f55d494bd | ||
|
|
d216edc567 | ||
|
|
ec6063ecc4 | ||
|
|
40fe4ce6fb | ||
|
|
31d87a4048 | ||
|
|
ac8b171fa9 | ||
|
|
1f06d78213 | ||
|
|
28eba17df8 | ||
|
|
dfc2e62339 | ||
|
|
80c89a39c9 | ||
|
|
9d1c16e996 | ||
|
|
86604c2353 | ||
|
|
8f31a02938 | ||
|
|
47d375309d | ||
|
|
980265ca97 | ||
|
|
90479fff95 | ||
|
|
1ce1fcb0ce | ||
|
|
1a662376fc | ||
|
|
1d24f926ec | ||
|
|
4f2c37c940 | ||
|
|
042115a6bb | ||
|
|
c9f1469b41 | ||
|
|
54c9f604c9 | ||
|
|
56fbcd6562 | ||
|
|
e6b0500568 | ||
|
|
41038b6673 | ||
|
|
26d03f26c9 | ||
|
|
f3a4e54996 | ||
|
|
925e80bb20 | ||
|
|
9bda09b1a8 | ||
|
|
ef0d0531fa | ||
|
|
6520f20ffe | ||
|
|
ebc4e0924b | ||
|
|
9e7c0e6033 | ||
|
|
cf5720f316 | ||
|
|
655b468269 | ||
|
|
17f8c93e44 | ||
|
|
5b4061b0d5 | ||
|
|
6ce0227e98 | ||
|
|
a583a28850 | ||
|
|
32daf65adc | ||
|
|
e22c80610e | ||
|
|
374f1e7e01 | ||
|
|
d2dfa93bf1 | ||
|
|
fa8c6712c6 | ||
|
|
4c2b84cb4d | ||
|
|
b57c9d569b | ||
|
|
f0e50ba000 | ||
|
|
4a6638f749 | ||
|
|
31577252f3 | ||
|
|
5d71c50080 | ||
|
|
981269d594 | ||
|
|
848db985fc | ||
|
|
d5d8e31447 | ||
|
|
66670a2370 | ||
|
|
5637f349c6 | ||
|
|
93248e1d00 | ||
|
|
187769357f | ||
|
|
5be6422cc8 | ||
|
|
8670b2d994 | ||
|
|
0bc6db428d | ||
|
|
67d565930e | ||
|
|
b2a7ff6fd3 | ||
|
|
425a730d7c | ||
|
|
84c5709722 | ||
|
|
94deec01c9 | ||
|
|
6e0dd4a779 | ||
|
|
14bde340dd | ||
|
|
253765c611 | ||
|
|
2b26d7182f | ||
|
|
61ac83e2d9 | ||
|
|
d5c7b28cad | ||
|
|
959580a708 | ||
|
|
3a5cd17ea3 | ||
|
|
b78981bb9d | ||
|
|
a6d90b0a00 | ||
|
|
67016492f2 | ||
|
|
2c38089527 | ||
|
|
48f68ba6dc | ||
|
|
574df4ba3d | ||
|
|
49ca16d125 | ||
|
|
87525b085e | ||
|
|
6b53c6add3 | ||
|
|
29ca1b7855 | ||
|
|
a42d0c9907 | ||
|
|
8bc6ceaa3d | ||
|
|
0b8a1ab5d1 | ||
|
|
358c287db2 | ||
|
|
2e68453655 | ||
|
|
89b8a9de7d | ||
|
|
c4c2058df9 | ||
|
|
0d85c0085f | ||
|
|
6fa8a8f84f | ||
|
|
a97775bff3 | ||
|
|
32640e054d | ||
|
|
aa42da5658 | ||
|
|
900a94a825 | ||
|
|
c37552de70 | ||
|
|
916b37926c | ||
|
|
2b76c3c15a | ||
|
|
cedd7dde18 | ||
|
|
d088608d8e | ||
|
|
06ee29bb8b | ||
|
|
d255e954d6 | ||
|
|
6a7ab6b8ac | ||
|
|
45b18cc0b1 | ||
|
|
0479431f0a | ||
|
|
ec58dbd791 | ||
|
|
91de68aab3 | ||
|
|
85efc30145 | ||
|
|
0032594f21 | ||
|
|
829fdc5679 | ||
|
|
22e176e329 | ||
|
|
826a70a137 | ||
|
|
dd0ea674af | ||
|
|
a4761b8921 | ||
|
|
3958bb7903 | ||
|
|
83a037a7ce | ||
|
|
a3eb8337a6 | ||
|
|
541072f8e0 | ||
|
|
881248cbd6 | ||
|
|
d4979f5e64 | ||
|
|
4133cd03bb | ||
|
|
9f07c3ca27 | ||
|
|
b20bacb9ed | ||
|
|
97cfbfee1d | ||
|
|
fa7c941792 | ||
|
|
4738879f32 | ||
|
|
d5d88f756a | ||
|
|
65b136bf15 | ||
|
|
bee0b238e4 | ||
|
|
c891168ffb | ||
|
|
6376c2f6aa | ||
|
|
4d9b7cdd61 | ||
|
|
8263d1dd6f | ||
|
|
faf41c0b36 | ||
|
|
27a09c0b2c | ||
|
|
3db7f6a284 | ||
|
|
3bfeb5b5ef | ||
|
|
62a7a555b5 | ||
|
|
d60e99a043 | ||
|
|
77723b34c7 | ||
|
|
c466d34a06 | ||
|
|
f816897833 | ||
|
|
c1e8a5e522 | ||
|
|
76aca32f2e | ||
|
|
7e31b2a795 | ||
|
|
028e38a86b | ||
|
|
8cf7649855 | ||
|
|
64f5119b08 | ||
|
|
4d606aefb3 | ||
|
|
4bafdaa04d | ||
|
|
5afe1abf82 | ||
|
|
f066d50b98 | ||
|
|
91103e21cc | ||
|
|
f44dabcd65 | ||
|
|
0fd2fca231 | ||
|
|
5bb64098e7 | ||
|
|
3fc85e75e0 | ||
|
|
3f61ea16b7 | ||
|
|
4b393092b5 | ||
|
|
b583f5162b | ||
|
|
060a22f395 | ||
|
|
d3e85355f1 | ||
|
|
83e730b768 | ||
|
|
5fcc96446c | ||
|
|
ad88925154 | ||
|
|
0a6ddbf15c | ||
|
|
08e0722d97 | ||
|
|
05d4fba551 | ||
|
|
f41c2b3c9f | ||
|
|
69f64899fe | ||
|
|
33f0865430 | ||
|
|
ad5b9202ab | ||
|
|
1676693091 | ||
|
|
0852b50b8f | ||
|
|
eb998aa502 | ||
|
|
6dab0e9de7 | ||
|
|
95ff1d141c | ||
|
|
87bc8a9da6 | ||
|
|
087fe9a537 | ||
|
|
c1170260b5 | ||
|
|
65cdf50774 | ||
|
|
9233bb490c | ||
|
|
43932220f7 | ||
|
|
cea4d1894e | ||
|
|
80baa0358d | ||
|
|
5d73db53a0 | ||
|
|
302ea90dce | ||
|
|
37b04ed283 | ||
|
|
be6995cfdf | ||
|
|
dfbc11300c | ||
|
|
82d539d174 | ||
|
|
6e00f31014 | ||
|
|
a46ac3cc92 | ||
|
|
6fbf98d8e2 | ||
|
|
f094c42728 | ||
|
|
13827e1282 | ||
|
|
32170b47d9 | ||
|
|
09c05354c2 | ||
|
|
b0b1475563 | ||
|
|
b85dd7283a | ||
|
|
846ae765e5 | ||
|
|
4c629e538e | ||
|
|
f6e22bb3b9 | ||
|
|
46a048d7f6 | ||
|
|
bd9f4eea06 | ||
|
|
0a672e61e2 | ||
|
|
29a8530221 | ||
|
|
3e738642a7 | ||
|
|
f551f55f03 | ||
|
|
9f012c8002 | ||
|
|
0a69a9e5ef | ||
|
|
194790183a | ||
|
|
2227721173 | ||
|
|
77a53da5f5 | ||
|
|
ab63ff275d | ||
|
|
e5363f65f0 | ||
|
|
ffc157de65 | ||
|
|
f9fdadb4c0 | ||
|
|
4efccb79f2 | ||
|
|
337968199a | ||
|
|
37027f68cb | ||
|
|
d1b62c5495 | ||
|
|
355fe01cb7 | ||
|
|
9d050a16c7 | ||
|
|
fa53c67606 | ||
|
|
5006376fe6 | ||
|
|
2204b8e205 | ||
|
|
270007b17c | ||
|
|
568eb2ef4c | ||
|
|
73ca9184a8 | ||
|
|
5e8e11e16e | ||
|
|
029bbc16f2 | ||
|
|
9e3d87e4f6 | ||
|
|
f1410a1127 | ||
|
|
2b980d16c3 | ||
|
|
b2b97aafb8 | ||
|
|
da2082b025 | ||
|
|
327ea9d547 | ||
|
|
b23db4a202 | ||
|
|
d1a36004ab | ||
|
|
6071920c45 | ||
|
|
5f539e1fba | ||
|
|
8e1539c360 | ||
|
|
065cfb2aca | ||
|
|
3147534e86 | ||
|
|
be5603bf16 | ||
|
|
b9b0bcdcbd | ||
|
|
5bcece56f3 | ||
|
|
d67faef88c | ||
|
|
8f6db5e905 | ||
|
|
82e93a0560 | ||
|
|
a9a82c083b | ||
|
|
974d9c33ed | ||
|
|
c1957ab694 | ||
|
|
b20a10a4bc | ||
|
|
be14ce465d | ||
|
|
d1ca0c5614 | ||
|
|
535514f506 | ||
|
|
933b63cf13 | ||
|
|
d7c3e380a5 | ||
|
|
c5298f78cb | ||
|
|
4f8f7b8d1d | ||
|
|
d7d46919ac | ||
|
|
e5d73d2e2e | ||
|
|
b145e8ec90 | ||
|
|
97ff4a1fb8 | ||
|
|
5018a552c1 | ||
|
|
7f9fd9ffce | ||
|
|
ddd0ca6a8f | ||
|
|
06f817c7e3 | ||
|
|
df4c3e56c4 | ||
|
|
9d5c2b9656 | ||
|
|
7ce59c5e2e | ||
|
|
1c9631fc78 | ||
|
|
efbe7297f7 | ||
|
|
1b45946a61 | ||
|
|
cbf5a6362c | ||
|
|
583b96c341 | ||
|
|
fc0920504d | ||
|
|
abd65a93b2 | ||
|
|
c3244fdd7a | ||
|
|
e8f58938b0 | ||
|
|
602b4f34b1 | ||
|
|
0399c84dfa | ||
|
|
fd5d879bf5 | ||
|
|
8dff460307 | ||
|
|
cce1ddb183 | ||
|
|
8691d14289 | ||
|
|
dd402da9e5 | ||
|
|
2fd04248f1 | ||
|
|
0ac42006f8 | ||
|
|
66e331248d | ||
|
|
4be3e8c87d | ||
|
|
dac033fe61 | ||
|
|
d302cbb114 | ||
|
|
e3b407db28 | ||
|
|
4ef623f09e | ||
|
|
253530a63d | ||
|
|
4f38d989f5 | ||
|
|
84074e90ee | ||
|
|
38aee7d8f2 | ||
|
|
64198313c6 | ||
|
|
d61b6c301c | ||
|
|
83d1931266 | ||
|
|
c31f2ab285 | ||
|
|
0ddc5721b4 | ||
|
|
98bd183bc4 | ||
|
|
aaa154524c | ||
|
|
beced68337 | ||
|
|
94823ab952 | ||
|
|
0b6a19802f | ||
|
|
c4a2d2197c | ||
|
|
269d06aa15 | ||
|
|
dfef1f2c54 | ||
|
|
b62beaba0b | ||
|
|
adf414e40f | ||
|
|
dc64e57f63 | ||
|
|
d3e410b2ac | ||
|
|
c544b2474b | ||
|
|
18243de358 | ||
|
|
6625895d1f | ||
|
|
f9ecce739e | ||
|
|
0075dd8386 | ||
|
|
eef1cde816 | ||
|
|
8d867c30c6 | ||
|
|
42c668b7ae | ||
|
|
b62227b4ae | ||
|
|
25ef0cb87b | ||
|
|
e195941aa5 | ||
|
|
e09eef1dd7 | ||
|
|
7c13663a4e | ||
|
|
5753869e5e | ||
|
|
ba878a19f4 | ||
|
|
55a9de78cd | ||
|
|
ff51fc9091 | ||
|
|
a4f857ee34 | ||
|
|
3250d74bef | ||
|
|
c086160239 | ||
|
|
6cdccaff53 | ||
|
|
a9ab8de25d | ||
|
|
2a29cb18a5 | ||
|
|
4193a4f415 | ||
|
|
0226ec450a | ||
|
|
020b8ebb35 | ||
|
|
1170b30c1b | ||
|
|
0004d4a906 | ||
|
|
cb27e86266 | ||
|
|
77a3b2ea5c | ||
|
|
099e65f3b6 | ||
|
|
befb8db120 | ||
|
|
9992d826b1 | ||
|
|
18604e1a39 | ||
|
|
312c569182 | ||
|
|
b43e0ed130 | ||
|
|
289debea34 | ||
|
|
ccd6af7016 | ||
|
|
effc69e4e4 | ||
|
|
c7a0d0db64 | ||
|
|
50d69a1ca4 | ||
|
|
8a6b8fe70a | ||
|
|
c4e53aea71 | ||
|
|
ad5125e93f | ||
|
|
8d92cbac93 | ||
|
|
0225443ec8 | ||
|
|
71e1d0a334 | ||
|
|
83f69e02fd | ||
|
|
e1b2da1ff0 | ||
|
|
5eb1b90a4b | ||
|
|
9c4ee74b91 | ||
|
|
f65f566829 | ||
|
|
c8ad3123b7 | ||
|
|
8cefce28cf | ||
|
|
a834d26885 | ||
|
|
810e3cd551 | ||
|
|
f258fa96cd | ||
|
|
757ec61f14 | ||
|
|
2c933f43d8 | ||
|
|
cc5bfa8af8 | ||
|
|
de9f3e55f1 | ||
|
|
ed0c986218 | ||
|
|
72c27215b6 | ||
|
|
c23b14f768 | ||
|
|
81282f9c4d | ||
|
|
2b324f6f81 | ||
|
|
049f110344 | ||
|
|
448a0307a8 | ||
|
|
7390e42f5c | ||
|
|
ee880d229f | ||
|
|
9cd07d81f8 | ||
|
|
b453d089c3 | ||
|
|
7410fe1d1e | ||
|
|
6323a77431 | ||
|
|
0aedaa8553 | ||
|
|
6554479d39 | ||
|
|
ce2ebd3198 | ||
|
|
13ea1efc96 | ||
|
|
ef380321cf | ||
|
|
294b037730 | ||
|
|
7603996612 | ||
|
|
3048d2b0b1 | ||
|
|
0bb47a09d2 | ||
|
|
1afe6901d9 | ||
|
|
3e019fb512 | ||
|
|
e069aa9608 | ||
|
|
0b32e42d25 | ||
|
|
8d18be5069 | ||
|
|
e715d99d0c | ||
|
|
dc28590247 | ||
|
|
139f158ea1 | ||
|
|
4b2a18837f | ||
|
|
b4340d0185 | ||
|
|
90d11398e6 | ||
|
|
bf8c73b25b | ||
|
|
21cd21de1b | ||
|
|
c25f6e56e7 | ||
|
|
a1f1d1995c | ||
|
|
390582d7f3 | ||
|
|
e765a29ca2 | ||
|
|
cf5c244487 | ||
|
|
a5eb30a93d | ||
|
|
ac7bc35944 | ||
|
|
ddfd721f6e | ||
|
|
aee3916cd1 | ||
|
|
3eff1e559b | ||
|
|
1a542c91fa | ||
|
|
cd60a84f8a | ||
|
|
3dd4bac6e6 | ||
|
|
06ff9cfede | ||
|
|
2d1ed9a304 | ||
|
|
50b51c05f6 | ||
|
|
5ce4b8dd5b | ||
|
|
2f4467b5a5 | ||
|
|
e91ab54a69 | ||
|
|
6a33432c82 | ||
|
|
135654a080 | ||
|
|
7b708a2bee | ||
|
|
b515c28417 | ||
|
|
854ffb0323 | ||
|
|
891b7b22ea | ||
|
|
c8d37a7227 | ||
|
|
489060881d | ||
|
|
d56a4cce1b | ||
|
|
7eb9dfde38 | ||
|
|
571e10f83e | ||
|
|
af202d4fe5 | ||
|
|
4057fbbcfd | ||
|
|
5cdb8a79a1 | ||
|
|
a674b43243 | ||
|
|
ac41f13b7c | ||
|
|
003b9887b1 | ||
|
|
ba45c2ab5b | ||
|
|
9d36a48a80 | ||
|
|
20a525635e | ||
|
|
659eceea95 | ||
|
|
d462c03d00 | ||
|
|
6591e07eb4 | ||
|
|
fe71825954 | ||
|
|
43516f84fe | ||
|
|
0849edb00b | ||
|
|
dd3b4083eb | ||
|
|
89673a4040 | ||
|
|
410dbd3dfc | ||
|
|
7085b1ea3f | ||
|
|
8683cae719 | ||
|
|
0197efa524 | ||
|
|
16e76caa33 | ||
|
|
1f5240694d | ||
|
|
f087151db7 | ||
|
|
0b691ff597 | ||
|
|
ae049961b7 | ||
|
|
0d6eee705f | ||
|
|
58d20ec9dc | ||
|
|
38befe1dc1 | ||
|
|
2f335100a5 | ||
|
|
3fef818843 | ||
|
|
428c8af77e | ||
|
|
54fccd2e25 | ||
|
|
66c6a5dc0f | ||
|
|
92561ae19d | ||
|
|
b85e93410b | ||
|
|
593993ba97 | ||
|
|
7b8b606278 | ||
|
|
7116ad0607 | ||
|
|
c507044277 | ||
|
|
5f45a9d90f | ||
|
|
e31e87aabd | ||
|
|
2957416d90 | ||
|
|
b9b761b67a | ||
|
|
a7539e9317 | ||
|
|
75575c0c68 | ||
|
|
77b3e08214 | ||
|
|
956b783c1a | ||
|
|
e90c080470 | ||
|
|
37aabaa03a | ||
|
|
3e289a7bef | ||
|
|
6dd5e3fdf5 | ||
|
|
e60df3c7c0 | ||
|
|
42f772beed | ||
|
|
3655c4a0fc | ||
|
|
012dbffd94 | ||
|
|
4b39efeee3 | ||
|
|
19caf750fd | ||
|
|
296611714f | ||
|
|
4c3d19cc8b | ||
|
|
a3ba07c7a3 | ||
|
|
a1579808b2 | ||
|
|
aecb9f5816 | ||
|
|
a5d42a526c | ||
|
|
a9472f8116 | ||
|
|
b19243ab75 | ||
|
|
2bf094b950 | ||
|
|
d5f106ae19 | ||
|
|
920745345a | ||
|
|
143033d7db | ||
|
|
335990c145 | ||
|
|
6d24e836b0 | ||
|
|
278a2fed56 | ||
|
|
c444004eec | ||
|
|
72cf7896d7 | ||
|
|
31af5f8177 | ||
|
|
6a68d9a57e | ||
|
|
39f41ab25e | ||
|
|
624cc1e987 | ||
|
|
08a15e5cdd | ||
|
|
4cd4787e4d | ||
|
|
65afee2808 | ||
|
|
00ece864ec | ||
|
|
6d6d9bea5a | ||
|
|
7c213f8533 | ||
|
|
3685c19b2d | ||
|
|
650a2b4da4 | ||
|
|
afea6f38f6 | ||
|
|
c45d428551 | ||
|
|
4e594aa9b0 | ||
|
|
32f91c5f31 | ||
|
|
a32ece897a | ||
|
|
88f6436aaa | ||
|
|
fac43cea06 | ||
|
|
a9e6aeed54 | ||
|
|
fa9f49f5bb | ||
|
|
2a6183aba5 | ||
|
|
b1a622971b | ||
|
|
5b72faccb4 | ||
|
|
c8732544c7 | ||
|
|
d4219b16b8 | ||
|
|
0c33432f64 | ||
|
|
95bd58cced | ||
|
|
8d7d1a7e24 | ||
|
|
3768cb2f2c | ||
|
|
d4b2741608 | ||
|
|
aef2152dcc | ||
|
|
d0b0221b97 | ||
|
|
b4758cd989 | ||
|
|
681250f114 | ||
|
|
fd13d3c50e | ||
|
|
674b8bb0cd | ||
|
|
5d9a962146 | ||
|
|
e130aada72 | ||
|
|
76709a9a39 | ||
|
|
acd2d55b84 | ||
|
|
fcec0eb812 | ||
|
|
e9965347b5 | ||
|
|
5a83f75e0d | ||
|
|
91c706a201 | ||
|
|
34384881bc | ||
|
|
71ba28753e | ||
|
|
32d2f0db66 | ||
|
|
e1169a4e82 | ||
|
|
0e5711e62d | ||
|
|
0ddfa3de5b | ||
|
|
661aa79b7c | ||
|
|
2c32cc2f27 | ||
|
|
d7bb0bc5cb | ||
|
|
d5644c3ab9 | ||
|
|
09ab8e3efd | ||
|
|
2f683529ec | ||
|
|
6ac012a82b | ||
|
|
075194cb54 | ||
|
|
269f070051 | ||
|
|
3342c9d7c2 | ||
|
|
b468b2f926 | ||
|
|
af1c7d0023 | ||
|
|
34670eef79 | ||
|
|
979739c1b7 | ||
|
|
83ed6870b9 | ||
|
|
57a568986a | ||
|
|
e828e26b5b | ||
|
|
825738440e | ||
|
|
147bd1a075 | ||
|
|
209e97f372 | ||
|
|
47f8627432 | ||
|
|
cc6713837a | ||
|
|
728fe0ad88 | ||
|
|
dbba45349f | ||
|
|
40ccf46b4b | ||
|
|
077bb9f20a | ||
|
|
e4c990c677 | ||
|
|
1c8b9d813a | ||
|
|
83812f2671 | ||
|
|
4053c33899 | ||
|
|
03978b63bc | ||
|
|
bf036be6b8 | ||
|
|
7ffb10d7f5 | ||
|
|
66377954cb | ||
|
|
e507686cef | ||
|
|
e5ddaf14f4 | ||
|
|
cf597a2f6b | ||
|
|
d83f0aabca | ||
|
|
b337e984b3 | ||
|
|
6366ee072e | ||
|
|
c3bfcbd562 | ||
|
|
c0d5054798 | ||
|
|
810dc30d3d | ||
|
|
36dd4933e9 | ||
|
|
435fffe1b0 | ||
|
|
2b8f1c4cda | ||
|
|
0e8c7a9b28 | ||
|
|
3e13678f23 | ||
|
|
455ec4f1fd | ||
|
|
8dc81042c3 | ||
|
|
c77db79447 | ||
|
|
de65028061 | ||
|
|
d66a795413 | ||
|
|
34762bf604 | ||
|
|
57121338b1 | ||
|
|
a5d246ec0c | ||
|
|
f2cefeeedc | ||
|
|
537e72a05f | ||
|
|
efa5a061d7 | ||
|
|
0bef44c2ff | ||
|
|
f62fe059b1 | ||
|
|
f432e2b17e | ||
|
|
8c877d7d8e | ||
|
|
dc9377fb92 | ||
|
|
7384b63b1d | ||
|
|
ba6ecf541f | ||
|
|
94e5709d58 | ||
|
|
add8d3cbaf | ||
|
|
1a42188bce | ||
|
|
0da427e127 | ||
|
|
9447b32f3e | ||
|
|
af10adb7fe | ||
|
|
129acf886f | ||
|
|
9af3e1efac | ||
|
|
9e22a8b4ff | ||
|
|
28da747f19 | ||
|
|
3d6783ddb0 | ||
|
|
349fc526d7 | ||
|
|
acf6dc0a30 | ||
|
|
3563e66ff6 | ||
|
|
8965ff27ec | ||
|
|
86feb1e104 | ||
|
|
f6257a86d3 | ||
|
|
bd04ea8aca | ||
|
|
754c1c6775 | ||
|
|
0b01eb5a11 | ||
|
|
6247b9df39 | ||
|
|
bd5344c892 | ||
|
|
e4fe54cd7f | ||
|
|
97f9e9b042 | ||
|
|
3668eb1606 | ||
|
|
e23addcc02 | ||
|
|
5147f4086e | ||
|
|
fb3c2de83f | ||
|
|
107817317c | ||
|
|
663ff3417c | ||
|
|
2b19d6bbac |
@@ -1,4 +1,4 @@
|
||||
name: lint
|
||||
name: format
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -12,12 +12,12 @@ on:
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: build-lint-${{ github.event.pull_request.number || github.ref }}
|
||||
group: build-format-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
autopep8:
|
||||
name: "Formatting lints"
|
||||
ruff-format:
|
||||
name: "Formatting checker"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: "3.10"
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
@@ -34,11 +34,8 @@ jobs:
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r dev-requirements.txt
|
||||
- name: autopep8
|
||||
id: autopep8
|
||||
- name: Ruff formatter
|
||||
id: ruff
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
autopep8 --max-line-length 100 --exit-code -r -d --exclude "*_pb2.py" -a -a src/
|
||||
- name: Fail if autopep8 requires changes
|
||||
if: steps.autopep8.outputs.exit-code == 2
|
||||
run: exit 1
|
||||
ruff format --config line-length=100 --diff --exclude "*_pb2.py"
|
||||
9
.github/workflows/publish_test.yaml
vendored
9
.github/workflows/publish_test.yaml
vendored
@@ -1,10 +1,6 @@
|
||||
name: publish-test
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
on: workflow_dispatch
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -14,7 +10,6 @@ jobs:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.gitref }}
|
||||
fetch-tags: true
|
||||
fetch-depth: 100
|
||||
- name: Set up Python
|
||||
@@ -40,7 +35,7 @@ jobs:
|
||||
name: wheels
|
||||
path: ./dist
|
||||
|
||||
publish-to-pypi:
|
||||
publish-to-test-pypi:
|
||||
name: "Publish to Test PyPI"
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ build ]
|
||||
|
||||
19
.github/workflows/tests.yaml
vendored
19
.github/workflows/tests.yaml
vendored
@@ -20,21 +20,24 @@ jobs:
|
||||
name: "Unit and Integration Tests"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: "3.10"
|
||||
- name: Cache virtual environment
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
# We are hashing requirements-dev.txt and requirements-extra.txt which
|
||||
# contain all dependencies needed to run the tests and examples.
|
||||
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('linux-py3.10-requirements.txt') }}-${{ hashFiles('dev-requirements.txt') }}
|
||||
# We are hashing dev-requirements.txt and test-requirements.txt which
|
||||
# contain all dependencies needed to run the tests.
|
||||
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
|
||||
path: .venv
|
||||
- name: Install system packages
|
||||
run: sudo apt-get install -y portaudio19-dev
|
||||
id: install_system_packages
|
||||
run: |
|
||||
sudo apt-get install -y portaudio19-dev
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
@@ -42,8 +45,8 @@ jobs:
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r linux-py3.10-requirements.txt -r dev-requirements.txt
|
||||
pip install -r dev-requirements.txt -r test-requirements.txt
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" src tests
|
||||
pytest --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests
|
||||
|
||||
1041
CHANGELOG.md
Normal file
1041
CHANGELOG.md
Normal file
File diff suppressed because it is too large
Load Diff
62
CHANGELOG.md.template
Normal file
62
CHANGELOG.md.template
Normal file
@@ -0,0 +1,62 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to the **<project name>** SDK will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
Please make sure to add your changes to the appropriate categories:
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
<!-- for new functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Changed
|
||||
|
||||
<!-- for changed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Deprecated
|
||||
|
||||
<!-- for soon-to-be removed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Removed
|
||||
|
||||
<!-- for removed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Fixed
|
||||
|
||||
<!-- for fixed bugs -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Performance
|
||||
|
||||
<!-- for performance-relevant changes -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Security
|
||||
|
||||
<!-- for security-relevant changes -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Other
|
||||
|
||||
<!-- for everything else -->
|
||||
|
||||
- n/a
|
||||
|
||||
## [0.1.0] - YYYY-MM-DD
|
||||
|
||||
Initial release.
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
BSD 2-Clause License
|
||||
|
||||
Copyright (c) 2024, Kwindla Hultman Kramer
|
||||
Copyright (c) 2024, Daily
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
64
README.md
64
README.md
@@ -4,8 +4,7 @@
|
||||
|
||||
# Pipecat
|
||||
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://discord.gg/pipecat)
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
|
||||
|
||||
`pipecat` is a framework for building voice (and multimodal) conversational agents. Things like personal coaches, meeting assistants, [story-telling toys for kids](https://storytelling-chatbot.fly.dev/), customer support bots, [intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0), and snarky social companions.
|
||||
|
||||
@@ -39,7 +38,7 @@ pip install "pipecat-ai[option,...]"
|
||||
|
||||
Your project may or may not need these, so they're made available as optional requirements. Here is a list:
|
||||
|
||||
- **AI services**: `anthropic`, `azure`, `fal`, `moondream`, `openai`, `playht`, `silero`, `whisper`
|
||||
- **AI services**: `anthropic`, `aws`, `azure`, `deepgram`, `gladia`, `google`, `fal`, `lmnt`, `moondream`, `openai`, `openpipe`, `playht`, `silero`, `whisper`, `xtts`
|
||||
- **Transports**: `local`, `websocket`, `daily`
|
||||
|
||||
## Code examples
|
||||
@@ -49,7 +48,7 @@ Your project may or may not need these, so they're made available as optional re
|
||||
|
||||
## A simple voice agent running locally
|
||||
|
||||
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [ElevenLabs](https://elevenlabs.io/) for text-to-speech.
|
||||
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [Cartesia](https://cartesia.ai/) for text-to-speech.
|
||||
|
||||
```python
|
||||
#app.py
|
||||
@@ -61,7 +60,7 @@ from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
async def main():
|
||||
@@ -70,14 +69,13 @@ async def main():
|
||||
transport = DailyTransport(
|
||||
room_url=...,
|
||||
token=...,
|
||||
"Bot Name",
|
||||
DailyParams(audio_out_enabled=True))
|
||||
bot_name="Bot Name",
|
||||
params=DailyParams(audio_out_enabled=True))
|
||||
|
||||
# Use Eleven Labs for Text-to-Speech
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=...,
|
||||
voice_id=...,
|
||||
# Use Cartesia for Text-to-Speech
|
||||
tts = CartesiaTTSService(
|
||||
api_key=...,
|
||||
voice_id=...
|
||||
)
|
||||
|
||||
# Simple pipeline that will process text to speech and output the result
|
||||
@@ -94,7 +92,7 @@ async def main():
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Eleven Labs)
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Cartesia)
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
|
||||
# Run the pipeline task
|
||||
@@ -112,7 +110,6 @@ python app.py
|
||||
|
||||
Daily provides a prebuilt WebRTC user interface. Whilst the app is running, you can visit at `https://<yourdomain>.daily.co/<room_url>` and listen to the bot say hello!
|
||||
|
||||
|
||||
## WebRTC for production use
|
||||
|
||||
WebSockets are fine for server-to-server communication or for initial development. But for production use, you’ll need client-server audio to use a protocol designed for real-time media transport. (For an explanation of the difference between WebSockets and WebRTC, see [this post.](https://www.daily.co/blog/how-to-talk-to-an-llm-with-your-voice/#webrtc))
|
||||
@@ -125,7 +122,7 @@ Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://
|
||||
|
||||
Voice Activity Detection — very important for knowing when a user has finished speaking to your bot. If you are not using press-to-talk, and want Pipecat to detect when the user has finished talking, VAD is an essential component for a natural feeling conversation.
|
||||
|
||||
Pipecast makes use of WebRTC VAD by default when using a WebRTC transport layer. Optionally, you can use Silero VAD for improved accuracy at the cost of higher CPU usage.
|
||||
Pipecat makes use of WebRTC VAD by default when using a WebRTC transport layer. Optionally, you can use Silero VAD for improved accuracy at the cost of higher CPU usage.
|
||||
|
||||
```shell
|
||||
pip install pipecat-ai[silero]
|
||||
@@ -133,7 +130,6 @@ pip install pipecat-ai[silero]
|
||||
|
||||
The first time your run your bot with Silero, startup may take a while whilst it downloads and caches the model in the background. You can check the progress of this in the console.
|
||||
|
||||
|
||||
## Hacking on the framework itself
|
||||
|
||||
_Note that you may need to set up a virtual environment before following the instructions below. For instance, you might need to run the following from the root of the repo:_
|
||||
@@ -146,20 +142,20 @@ source venv/bin/activate
|
||||
From the root of this repo, run the following:
|
||||
|
||||
```shell
|
||||
pip install -r dev-requirements.txt -r {env}-requirements.txt
|
||||
pip install -r dev-requirements.txt
|
||||
python -m build
|
||||
```
|
||||
|
||||
This builds the package. To use the package locally (eg to run sample files), run
|
||||
This builds the package. To use the package locally (e.g. to run sample files), run
|
||||
|
||||
```shell
|
||||
pip install --editable .
|
||||
pip install --editable ".[option,...]"
|
||||
```
|
||||
|
||||
If you want to use this package from another directory, you can run:
|
||||
|
||||
```shell
|
||||
pip install path_to_this_repo
|
||||
pip install "path_to_this_repo[option,...]"
|
||||
```
|
||||
|
||||
### Running tests
|
||||
@@ -167,27 +163,29 @@ pip install path_to_this_repo
|
||||
From the root directory, run:
|
||||
|
||||
```shell
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" src tests
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests
|
||||
```
|
||||
|
||||
## Setting up your editor
|
||||
|
||||
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting.
|
||||
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting via [Ruff](https://github.com/astral-sh/ruff).
|
||||
|
||||
### Emacs
|
||||
|
||||
You can use [use-package](https://github.com/jwiegley/use-package) to install [py-autopep8](https://codeberg.org/ideasman42/emacs-py-autopep8) package and configure `autopep8` arguments:
|
||||
You can use [use-package](https://github.com/jwiegley/use-package) to install [emacs-lazy-ruff](https://github.com/christophermadsen/emacs-lazy-ruff) package and configure `ruff` arguments:
|
||||
|
||||
```elisp
|
||||
(use-package py-autopep8
|
||||
(use-package lazy-ruff
|
||||
:ensure t
|
||||
:defer t
|
||||
:hook ((python-mode . py-autopep8-mode))
|
||||
:hook ((python-mode . lazy-ruff-mode))
|
||||
:config
|
||||
(setq py-autopep8-options '("-a" "-a", "--max-line-length=100")))
|
||||
(setq lazy-ruff-format-command "ruff format --config line-length=100")
|
||||
(setq lazy-ruff-only-format-block t)
|
||||
(setq lazy-ruff-only-format-region t)
|
||||
(setq lazy-ruff-only-format-buffer t))
|
||||
```
|
||||
|
||||
`autopep8` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
|
||||
`ruff` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
|
||||
|
||||
```elisp
|
||||
(use-package pyvenv-auto
|
||||
@@ -200,18 +198,14 @@ You can use [use-package](https://github.com/jwiegley/use-package) to install [p
|
||||
### Visual Studio Code
|
||||
|
||||
Install the
|
||||
[autopep8](https://marketplace.visualstudio.com/items?itemName=ms-python.autopep8) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, enable formatting on save and configure `autopep8` arguments:
|
||||
[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, enable formatting on save and configure `ruff` arguments:
|
||||
|
||||
```json
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.autopep8",
|
||||
"editor.defaultFormatter": "charliermarsh.ruff",
|
||||
"editor.formatOnSave": true
|
||||
},
|
||||
"autopep8.args": [
|
||||
"-a",
|
||||
"-a",
|
||||
"--max-line-length=100"
|
||||
],
|
||||
"ruff.format.args": ["--config", "line-length=100"]
|
||||
```
|
||||
|
||||
## Getting help
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
autopep8~=2.1.0
|
||||
build~=1.2.1
|
||||
grpcio-tools~=1.62.2
|
||||
pip-tools~=7.4.1
|
||||
pytest~=8.2.0
|
||||
setuptools~=69.5.1
|
||||
pyright~=1.1.376
|
||||
pytest~=8.3.2
|
||||
ruff~=0.6.7
|
||||
setuptools~=72.2.0
|
||||
setuptools_scm~=8.1.0
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
# Anthropic
|
||||
ANTHROPIC_API_KEY=...
|
||||
|
||||
# AWS
|
||||
AWS_SECRET_ACCESS_KEY=...
|
||||
AWS_ACCESS_KEY_ID=...
|
||||
AWS_REGION=...
|
||||
|
||||
# Azure
|
||||
AZURE_SPEECH_REGION=...
|
||||
AZURE_SPEECH_API_KEY=...
|
||||
@@ -27,9 +32,19 @@ FAL_KEY=...
|
||||
# Fireworks
|
||||
FIREWORKS_API_KEY=...
|
||||
|
||||
# Gladia
|
||||
GLADIA_API_KEY=...
|
||||
|
||||
# LMNT
|
||||
LMNT_API_KEY=...
|
||||
LMNT_VOICE_ID=...
|
||||
|
||||
# PlayHT
|
||||
PLAY_HT_USER_ID=...
|
||||
PLAY_HT_API_KEY=...
|
||||
|
||||
# OpenAI
|
||||
OPENAI_API_KEY=...
|
||||
|
||||
#OpenPipe
|
||||
OPENPIPE_API_KEY=...
|
||||
|
||||
@@ -32,13 +32,16 @@ Next, follow the steps in the README for each demo.
|
||||
|
||||
## Projects:
|
||||
|
||||
| Project | Description | Services |
|
||||
| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- |
|
||||
| [Simple Chatbot](simple-chatbot) | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework. | Deepgram, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience. | Deepgram, ElevenLabs, Open AI, Fal, Daily, Custom UI |
|
||||
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, OpenAI, Moondream, Daily, Daily Prebuilt UI |
|
||||
| Function-calling Chatbot (TBC) | A chatbot that can call functions in response to user input | Deepgram, OpenAI, Fireworks, Daily, Daily Prebuilt UI |
|
||||
| Project | Description | Services |
|
||||
|----------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
|
||||
| [Simple Chatbot](simple-chatbot) | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience. | Deepgram, ElevenLabs, OpenAI, Fal, Daily, Custom UI |
|
||||
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, ElevenLabs, OpenAI, Moondream, Daily, Daily Prebuilt UI |
|
||||
| [Patient intake](patient-intake) | A chatbot that can call functions in response to user input. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Dialin Chatbot](dialin-chatbot) | A chatbot that connects to an incoming phone call from Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
|
||||
| [Twilio Chatbot](twilio-chatbot) | A chatbot that connects to an incoming phone call from Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
|
||||
| [studypal](studypal) | A chatbot to have a conversation about any article on the web | |
|
||||
|
||||
> [!IMPORTANT]
|
||||
> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.
|
||||
|
||||
13
examples/deployment/flyio-example/Dockerfile
Normal file
13
examples/deployment/flyio-example/Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
||||
FROM python:3.11-bullseye
|
||||
|
||||
# Open port 7860 for http service
|
||||
ENV FAST_API_PORT=7860
|
||||
EXPOSE 7860
|
||||
|
||||
# Install Python dependencies
|
||||
COPY *.py .
|
||||
COPY ./requirements.txt requirements.txt
|
||||
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
|
||||
|
||||
# Start the FastAPI server
|
||||
CMD python3 bot_runner.py --port ${FAST_API_PORT}
|
||||
39
examples/deployment/flyio-example/README.md
Normal file
39
examples/deployment/flyio-example/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# Fly.io deployment example
|
||||
|
||||
This project modifies the `bot_runner.py` server to launch a new machine for each user session. This is a recommended approach for production vs. running shell processess as your deployment will quickly run out of system resources under load.
|
||||
|
||||
For this example, we are using Daily as a WebRTC transport and provisioning a new room and token for each session. You can use another transport, such as WebSockets, by modifying the `bot.py` and `bot_runner.py` files accordingly.
|
||||
|
||||
## Setting up your fly.io deployment
|
||||
|
||||
### Create your fly.toml file
|
||||
|
||||
You can copy the `example-fly.toml` as a reference. Be sure to change the app name to something unique.
|
||||
|
||||
### Create your .env file
|
||||
|
||||
Copy the base `env.example` to `.env` and enter the necessary API keys.
|
||||
|
||||
`FLY_APP_NAME` should match that in the `fly.toml` file.
|
||||
|
||||
### Launch a new fly.io project
|
||||
|
||||
`fly launch` or `fly launch --org your-org-name`
|
||||
|
||||
### Set the necessary app secrets from your .env
|
||||
|
||||
Note: you can do this manually via the fly.io dashboard under the "secrets" sub-section of your deployment (e.g. "https://fly.io/apps/fly-app-name/secrets") or run the following terminal command:
|
||||
|
||||
`cat .env | tr '\n' ' ' | xargs flyctl secrets set`
|
||||
|
||||
### Deploy your machine
|
||||
|
||||
`fly deploy`
|
||||
|
||||
## Connecting to your bot
|
||||
|
||||
Send a post request to your running fly.io instance:
|
||||
|
||||
`curl --location --request POST 'https://YOUR_FLY_APP_NAME/start_bot'`
|
||||
|
||||
This request will wait until the machine enters into a `starting` state, before returning the a room URL and token to join.
|
||||
104
examples/deployment/flyio-example/bot.py
Normal file
104
examples/deployment/flyio-example/bot.py
Normal file
@@ -0,0 +1,104 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
daily_api_key = os.getenv("DAILY_API_KEY", "")
|
||||
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str):
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your output will be converted to audio so don't include special characters other than '!' or '?' in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying hello.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if state == "left":
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot")
|
||||
parser.add_argument("-u", type=str, help="Room URL")
|
||||
parser.add_argument("-t", type=str, help="Token")
|
||||
config = parser.parse_args()
|
||||
|
||||
asyncio.run(main(config.u, config.t))
|
||||
211
examples/deployment/flyio-example/bot_runner.py
Normal file
211
examples/deployment/flyio-example/bot_runner.py
Normal file
@@ -0,0 +1,211 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomProperties,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# ------------ Configuration ------------ #
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
REQUIRED_ENV_VARS = [
|
||||
"DAILY_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"ELEVENLABS_API_KEY",
|
||||
"ELEVENLABS_VOICE_ID",
|
||||
"FLY_API_KEY",
|
||||
"FLY_APP_NAME",
|
||||
]
|
||||
|
||||
FLY_API_HOST = os.getenv("FLY_API_HOST", "https://api.machines.dev/v1")
|
||||
FLY_APP_NAME = os.getenv("FLY_APP_NAME", "pipecat-fly-example")
|
||||
FLY_API_KEY = os.getenv("FLY_API_KEY", "")
|
||||
FLY_HEADERS = {"Authorization": f"Bearer {FLY_API_KEY}", "Content-Type": "application/json"}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
# ----------------- API ----------------- #
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# ----------------- Main ----------------- #
|
||||
|
||||
|
||||
async def spawn_fly_machine(room_url: str, token: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use the same image as the bot runner
|
||||
async with session.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Unable to get machine info from Fly: {text}")
|
||||
|
||||
data = await r.json()
|
||||
image = data[0]["config"]["image"]
|
||||
|
||||
# Machine configuration
|
||||
cmd = f"python3 bot.py -u {room_url} -t {token}"
|
||||
cmd = cmd.split()
|
||||
worker_props = {
|
||||
"config": {
|
||||
"image": image,
|
||||
"auto_destroy": True,
|
||||
"init": {"cmd": cmd},
|
||||
"restart": {"policy": "no"},
|
||||
"guest": {"cpu_kind": "shared", "cpus": 1, "memory_mb": 1024},
|
||||
},
|
||||
}
|
||||
|
||||
# Spawn a new machine instance
|
||||
async with session.post(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS, json=worker_props
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Problem starting a bot worker: {text}")
|
||||
|
||||
data = await r.json()
|
||||
# Wait for the machine to enter the started state
|
||||
vm_id = data["id"]
|
||||
|
||||
async with session.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started",
|
||||
headers=FLY_HEADERS,
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Bot was unable to enter started state: {text}")
|
||||
|
||||
print(f"Machine joined room: {room_url}")
|
||||
|
||||
|
||||
@app.post("/start_bot")
|
||||
async def start_bot(request: Request) -> JSONResponse:
|
||||
try:
|
||||
data = await request.json()
|
||||
# Is this a webhook creation request?
|
||||
if "test" in data:
|
||||
return JSONResponse({"test": True})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Use specified room URL, or create a new one if not specified
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", "")
|
||||
|
||||
if not room_url:
|
||||
params = DailyRoomParams(properties=DailyRoomProperties())
|
||||
try:
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Unable to provision room {e}")
|
||||
else:
|
||||
# Check passed room URL exists, we should assume that it already has a sip set up
|
||||
try:
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
# Launch a new fly.io machine, or run as a shell process (not recommended)
|
||||
run_as_process = os.getenv("RUN_AS_PROCESS", False)
|
||||
|
||||
if run_as_process:
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
else:
|
||||
try:
|
||||
await spawn_fly_machine(room.url, token)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to spawn VM: {e}")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
user_token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"room_url": room.url,
|
||||
"token": user_token,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
for env_var in REQUIRED_ENV_VARS:
|
||||
if env_var not in os.environ:
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument(
|
||||
"--host", type=str, default=os.getenv("HOST", "0.0.0.0"), help="Host address"
|
||||
)
|
||||
parser.add_argument("--port", type=int, default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument(
|
||||
"--reload", action="store_true", default=False, help="Reload code on change"
|
||||
)
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run("bot_runner:app", host=config.host, port=config.port, reload=config.reload)
|
||||
except KeyboardInterrupt:
|
||||
print("Pipecat runner shutting down...")
|
||||
8
examples/deployment/flyio-example/env.example
Normal file
8
examples/deployment/flyio-example/env.example
Normal file
@@ -0,0 +1,8 @@
|
||||
DAILY_API_KEY=
|
||||
DAILY_SAMPLE_ROOM_URL= # Enter a Daily room URL to use a set room URL each time (useful for local testing)
|
||||
OPENAI_API_KEY=
|
||||
ELEVENLABS_API_KEY=
|
||||
ELEVENLABS_VOICE_ID=
|
||||
FLY_API_KEY=
|
||||
FLY_APP_NAME=
|
||||
RUN_AS_PROCESS= # Spawn fly.io machine for each session or run as local process
|
||||
25
examples/deployment/flyio-example/example-fly.toml
Normal file
25
examples/deployment/flyio-example/example-fly.toml
Normal file
@@ -0,0 +1,25 @@
|
||||
# fly.toml app configuration file generated for pipecat-fly-example on 2024-07-01T15:04:53+01:00
|
||||
#
|
||||
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
|
||||
#
|
||||
|
||||
app = 'pipecat-fly-example'
|
||||
primary_region = 'sjc'
|
||||
|
||||
[build]
|
||||
|
||||
[env]
|
||||
FLY_APP_NAME = 'pipecat-fly-example'
|
||||
|
||||
[http_service]
|
||||
internal_port = 7860
|
||||
force_https = true
|
||||
auto_stop_machines = true
|
||||
auto_start_machines = true
|
||||
min_machines_running = 0
|
||||
processes = ['app']
|
||||
|
||||
[[vm]]
|
||||
memory = 512
|
||||
cpu_kind = 'shared'
|
||||
cpus = 1
|
||||
5
examples/deployment/flyio-example/requirements.txt
Normal file
5
examples/deployment/flyio-example/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
pipecat-ai[daily,openai,silero]
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
loguru
|
||||
3
examples/dialin-chatbot/.dockerignore
Normal file
3
examples/dialin-chatbot/.dockerignore
Normal file
@@ -0,0 +1,3 @@
|
||||
**/.DS_Store
|
||||
.env
|
||||
.env.*
|
||||
165
examples/dialin-chatbot/.gitignore
vendored
Normal file
165
examples/dialin-chatbot/.gitignore
vendored
Normal file
@@ -0,0 +1,165 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
runpod.toml
|
||||
|
||||
# custom script to recursively upgrade items in requirements.py
|
||||
upgrade_requirements.py
|
||||
.DS_Store
|
||||
40
examples/dialin-chatbot/Dockerfile
Normal file
40
examples/dialin-chatbot/Dockerfile
Normal file
@@ -0,0 +1,40 @@
|
||||
FROM python:3.11-bullseye
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
ARG USE_PERSISTENT_DATA
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
# Expose FastAPI port
|
||||
ENV FAST_API_PORT=7860
|
||||
EXPOSE 7860
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install --no-install-recommends -y \
|
||||
build-essential \
|
||||
git \
|
||||
ffmpeg \
|
||||
google-perftools \
|
||||
ca-certificates curl gnupg \
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set up a new user named "user" with user ID 1000
|
||||
RUN useradd -m -u 1000 user
|
||||
|
||||
# Set home to the user's home directory
|
||||
ENV HOME=/home/user \
|
||||
PATH=/home/user/.local/bin:$PATH \
|
||||
PYTHONPATH=$HOME/app \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
# Switch to the "user" user
|
||||
USER user
|
||||
|
||||
# Set the working directory to the user's home directory
|
||||
WORKDIR $HOME/app
|
||||
|
||||
# Install Python dependencies
|
||||
COPY *.py .
|
||||
COPY ./requirements.txt requirements.txt
|
||||
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
|
||||
|
||||
# Start the FastAPI server
|
||||
CMD python3 bot_runner.py --host "0.0.0.0" --port ${FAST_API_PORT}
|
||||
85
examples/dialin-chatbot/README.md
Normal file
85
examples/dialin-chatbot/README.md
Normal file
@@ -0,0 +1,85 @@
|
||||
<div align="center">
|
||||
<img alt="pipecat" width="300px" height="auto" src="image.png">
|
||||
</div>
|
||||
|
||||
# Dialin example
|
||||
|
||||
Example project that demonstrates how to add phone number dialin to your Pipecat bots. We include examples for both Daily (`bot_daily.py`) and Twilio (`bot_twilio.py`), depending on who you want to use as a phone vendor.
|
||||
|
||||
- 🔁 Transport: Daily WebRTC
|
||||
- 💬 Speech-to-Text: Deepgram via Daily transport
|
||||
- 🤖 LLM: GPT4-o / OpenAI
|
||||
- 🔉 Text-to-Speech: ElevenLabs
|
||||
|
||||
#### Should I use Daily or Twilio as a vendor?
|
||||
|
||||
If you're starting from scratch, using Daily to provision phone numbers alongside Daily as a transport offers some convenience (such as automatic call forwarding.)
|
||||
|
||||
If you already have Twilio numbers and workflows that you want to connect to your Pipecat bots, there is some additional configuration required (you'll need to create a `on_dialin_ready` and use the Twilio client to trigger the forward.)
|
||||
|
||||
You can read more about this, as well as see respective walkthroughs in our docs.
|
||||
|
||||
## Setup
|
||||
|
||||
```shell
|
||||
# Install the requirements
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Setup your env
|
||||
mv env.example .env
|
||||
```
|
||||
|
||||
## Using Daily numbers
|
||||
|
||||
Run `bot_runner.py` to handle incoming HTTP requests:
|
||||
|
||||
`python bot_runner.py --host localhost`
|
||||
|
||||
Then target the following URL:
|
||||
|
||||
`POST /daily_start_bot`
|
||||
|
||||
For more configuration options, please consult Daily's API documentation.
|
||||
|
||||
|
||||
## Using Twilio numbers
|
||||
|
||||
As above, but target the following URL:
|
||||
|
||||
`POST /twilio_start_bot`
|
||||
|
||||
For more configuration options, please consult Twilio's API documentation.
|
||||
|
||||
## Deployment example
|
||||
|
||||
A Dockerfile is included in this demo for convenience. Here is an example of how to build and deploy your bot to [fly.io](https://fly.io).
|
||||
|
||||
*Please note: This demo spawns agents as subprocesses for convenience / demonstration purposes. You would likely not want to do this in production as it would limit concurrency to available system resources. For more information on how to deploy your bots using VMs, refer to the Pipecat documentation.*
|
||||
|
||||
### Build the docker image
|
||||
|
||||
`docker build -t tag:project .`
|
||||
|
||||
### Launch the fly project
|
||||
|
||||
`mv fly.example.toml fly.toml`
|
||||
|
||||
`fly launch` (using the included fly.toml)
|
||||
|
||||
### Setup your secrets on Fly
|
||||
|
||||
Set the necessary secrets (found in `env.example`)
|
||||
|
||||
`fly secrets set DAILY_API_KEY=... OPENAI_API_KEY=... ELEVENLABS_API_KEY=... ELEVENLABS_VOICE_ID=...`
|
||||
|
||||
If you're using Twilio as a number vendor:
|
||||
|
||||
`fly secrets set TWILIO_ACCOUNT_SID=... TWILIO_AUTH_TOKEN=...`
|
||||
|
||||
### Deploy!
|
||||
|
||||
`fly deploy`
|
||||
|
||||
## Need to do something more advanced?
|
||||
|
||||
This demo covers the basics of bot telephony. If you want to know more about working with PSTN / SIP, please ping us on [Discord](https://discord.gg/pipecat).
|
||||
106
examples/dialin-chatbot/bot_daily.py
Normal file
106
examples/dialin-chatbot/bot_daily.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyDialinSettings
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
daily_api_key = os.getenv("DAILY_API_KEY", "")
|
||||
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str, callId: str, callDomain: str):
|
||||
# diallin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
diallin_settings = DailyDialinSettings(call_id=callId, call_domain=callDomain)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=diallin_settings,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Oh, hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Pipecat Simple ChatBot")
|
||||
parser.add_argument("-u", type=str, help="Room URL")
|
||||
parser.add_argument("-t", type=str, help="Token")
|
||||
parser.add_argument("-i", type=str, help="Call ID")
|
||||
parser.add_argument("-d", type=str, help="Call Domain")
|
||||
config = parser.parse_args()
|
||||
|
||||
asyncio.run(main(config.u, config.t, config.i, config.d))
|
||||
220
examples/dialin-chatbot/bot_runner.py
Normal file
220
examples/dialin-chatbot/bot_runner.py
Normal file
@@ -0,0 +1,220 @@
|
||||
"""
|
||||
bot_runner.py
|
||||
|
||||
HTTP service that listens for incoming calls from either Daily or Twilio,
|
||||
provisioning a room and starting a Pipecat bot in response.
|
||||
|
||||
Refer to README for more information.
|
||||
"""
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, PlainTextResponse
|
||||
from twilio.twiml.voice_response import VoiceResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomProperties,
|
||||
DailyRoomSipParams,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# ------------ Configuration ------------ #
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
REQUIRED_ENV_VARS = ["OPENAI_API_KEY", "DAILY_API_KEY", "ELEVENLABS_API_KEY", "ELEVENLABS_VOICE_ID"]
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
# ----------------- API ----------------- #
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
"""
|
||||
Create Daily room, tell the bot if the room is created for Twilio's SIP or Daily's SIP (vendor).
|
||||
When the vendor is Daily, the bot handles the call forwarding automatically,
|
||||
i.e, forwards the call from the "hold music state" to the Daily Room's SIP URI.
|
||||
|
||||
Alternatively, when the vendor is Twilio (not Daily), the bot is responsible for
|
||||
updating the state on Twilio. So when `dialin-ready` fires, it takes appropriate
|
||||
action using the Twilio Client library.
|
||||
"""
|
||||
|
||||
|
||||
async def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
|
||||
if not room_url:
|
||||
params = DailyRoomParams(
|
||||
properties=DailyRoomProperties(
|
||||
# Note: these are the default values, except for the display name
|
||||
sip=DailyRoomSipParams(
|
||||
display_name="dialin-user", video=False, sip_mode="dial-in", num_endpoints=1
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
print(f"Creating new room...")
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
|
||||
else:
|
||||
# Check passed room URL exist (we assume that it already has a sip set up!)
|
||||
try:
|
||||
print(f"Joining existing room: {room_url}")
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
print(f"Daily room: {room.url} {room.config.sip_endpoint}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get room or token token")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in docs)
|
||||
if vendor == "daily":
|
||||
bot_proc = f"python3 - m bot_daily - u {room.url} - t {token} - i {
|
||||
callId} - d {callDomain}"
|
||||
else:
|
||||
bot_proc = f"python3 - m bot_twilio - u {room.url} - t {
|
||||
token} - i {callId} - s {room.config.sip_endpoint}"
|
||||
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[bot_proc], shell=True, bufsize=1, cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return room
|
||||
|
||||
|
||||
@app.post("/twilio_start_bot", response_class=PlainTextResponse)
|
||||
async def twilio_start_bot(request: Request):
|
||||
print(f"POST /twilio_voice_bot")
|
||||
|
||||
# twilio_start_bot is invoked directly by Twilio (as a web hook).
|
||||
# On Twilio, under Active Numbers, pick the phone number
|
||||
# Click Configure and under Voice Configuration,
|
||||
# "a call comes in" choose webhook and point the URL to
|
||||
# where this code is hosted.
|
||||
data = {}
|
||||
try:
|
||||
# shouldnt have received json, twilio sends form data
|
||||
form_data = await request.form()
|
||||
data = dict(form_data)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
|
||||
callId = data.get("CallSid")
|
||||
|
||||
if not callId:
|
||||
raise HTTPException(status_code=500, detail="Missing 'CallSid' in request")
|
||||
|
||||
print("CallId: %s" % callId)
|
||||
|
||||
# create room and tell the bot to join the created room
|
||||
# note: Twilio does not require a callDomain
|
||||
room: DailyRoomObject = await _create_daily_room(room_url, callId, None, "twilio")
|
||||
|
||||
print(f"Put Twilio on hold...")
|
||||
# We have the room and the SIP URI,
|
||||
# but we do not know if the Daily SIP Worker and the Bot have joined the call
|
||||
# put the call on hold until the 'on_dialin_ready' fires.
|
||||
# Then, the bot will update the called sid with the sip uri.
|
||||
# http://com.twilio.music.classical.s3.amazonaws.com/BusyStrings.mp3
|
||||
resp = VoiceResponse()
|
||||
resp.play(
|
||||
url="http://com.twilio.sounds.music.s3.amazonaws.com/MARKOVICHAMP-Borghestral.mp3", loop=10
|
||||
)
|
||||
return str(resp)
|
||||
|
||||
|
||||
@app.post("/daily_start_bot")
|
||||
async def daily_start_bot(request: Request) -> JSONResponse:
|
||||
# The /daily_start_bot is invoked when a call is received on Daily's SIP URI
|
||||
# daily_start_bot will create the room, put the call on hold until
|
||||
# the bot and sip worker are ready. Daily will automatically
|
||||
# forward the call to the SIP URi when dialin_ready fires.
|
||||
|
||||
# Use specified room URL, or create a new one if not specified
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
|
||||
# Get the dial-in properties from the request
|
||||
try:
|
||||
data = await request.json()
|
||||
if "test" in data:
|
||||
# Pass through any webhook checks
|
||||
return JSONResponse({"test": True})
|
||||
callId = data.get("callId", None)
|
||||
callDomain = data.get("callDomain", None)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail="Missing properties 'callId' or 'callDomain'")
|
||||
|
||||
print(f"CallId: {callId}, CallDomain: {callDomain}")
|
||||
room: DailyRoomObject = await _create_daily_room(room_url, callId, callDomain, "daily")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
return JSONResponse({"room_url": room.url, "sipUri": room.config.sip_endpoint})
|
||||
|
||||
|
||||
# ----------------- Main ----------------- #
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
for env_var in REQUIRED_ENV_VARS:
|
||||
if env_var not in os.environ:
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument(
|
||||
"--host", type=str, default=os.getenv("HOST", "0.0.0.0"), help="Host address"
|
||||
)
|
||||
parser.add_argument("--port", type=int, default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", default=True, help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run("bot_runner:app", host=config.host, port=config.port, reload=config.reload)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Pipecat runner shutting down...")
|
||||
123
examples/dialin-chatbot/bot_twilio.py
Normal file
123
examples/dialin-chatbot/bot_twilio.py
Normal file
@@ -0,0 +1,123 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from twilio.rest import Client
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
twilio_account_sid = os.getenv("TWILIO_ACCOUNT_SID")
|
||||
twilio_auth_token = os.getenv("TWILIO_AUTH_TOKEN")
|
||||
twilioclient = Client(twilio_account_sid, twilio_auth_token)
|
||||
|
||||
daily_api_key = os.getenv("DAILY_API_KEY", "")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str, callId: str, sipUri: str):
|
||||
# dialin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=None, # Not required for Twilio
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_dialin_ready")
|
||||
async def on_dialin_ready(transport, cdata):
|
||||
# For Twilio, Telnyx, etc. You need to update the state of the call
|
||||
# and forward it to the sip_uri..
|
||||
print(f"Forwarding call: {callId} {sipUri}")
|
||||
|
||||
try:
|
||||
# The TwiML is updated using Twilio's client library
|
||||
call = twilioclient.calls(callId).update(
|
||||
twiml=f"<Response><Dial><Sip>{sipUri}</Sip></Dial></Response>"
|
||||
)
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to forward call: {str(e)}")
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Pipecat Simple ChatBot")
|
||||
parser.add_argument("-u", type=str, help="Room URL")
|
||||
parser.add_argument("-t", type=str, help="Token")
|
||||
parser.add_argument("-i", type=str, help="Call ID")
|
||||
parser.add_argument("-s", type=str, help="SIP URI")
|
||||
config = parser.parse_args()
|
||||
|
||||
asyncio.run(main(config.u, config.t, config.i, config.s))
|
||||
8
examples/dialin-chatbot/env.example
Normal file
8
examples/dialin-chatbot/env.example
Normal file
@@ -0,0 +1,8 @@
|
||||
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (optional: for joining the bot to the same room repeatedly for local dev)
|
||||
DAILY_API_KEY=.
|
||||
DAILY_API_URL=api.daily.co/v1
|
||||
OPENAI_API_KEY=
|
||||
ELEVENLABS_API_KEY=
|
||||
ELEVENLABS_VOICE_ID=
|
||||
TWILIO_ACCOUNT_SID=
|
||||
TWILIO_AUTH_TOKEN=
|
||||
19
examples/dialin-chatbot/fly.example.toml
Normal file
19
examples/dialin-chatbot/fly.example.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
# fly.toml app configuration file generated for pipecat-dialin-demo on 2024-06-03T15:57:57+02:00
|
||||
#
|
||||
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
|
||||
#
|
||||
|
||||
app = 'pipecat-dialin-demo'
|
||||
primary_region = 'sjc'
|
||||
|
||||
[build]
|
||||
|
||||
[http_service]
|
||||
internal_port = 7860
|
||||
force_https = true
|
||||
auto_stop_machines = true
|
||||
auto_start_machines = true
|
||||
min_machines_running = 1
|
||||
|
||||
[[vm]]
|
||||
size = 'performance-1x'
|
||||
BIN
examples/dialin-chatbot/image.png
Normal file
BIN
examples/dialin-chatbot/image.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 19 KiB |
6
examples/dialin-chatbot/requirements.txt
Normal file
6
examples/dialin-chatbot/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
pipecat-ai[daily,elevenlabs,openai,silero]
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
twilio
|
||||
python-multipart
|
||||
@@ -13,7 +13,7 @@ from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
@@ -21,21 +21,24 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True))
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -46,11 +49,11 @@ async def main(room_url):
|
||||
# participant joins.
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
participant_name = participant["info"]["userName"] or ""
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -9,17 +9,18 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -30,10 +31,9 @@ async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = LocalAudioTransport(TransportParams(audio_out_enabled=True))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
pipeline = Pipeline([tts, transport.output()])
|
||||
@@ -42,7 +42,7 @@ async def main():
|
||||
|
||||
async def say_something():
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frames([TextFrame("Hello there!"), EndFrame()])
|
||||
await task.queue_frame(TextFrame("Hello there!"))
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
108
examples/foundational/01b-livekit-audio.py
Normal file
108
examples/foundational/01b-livekit-audio.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from livekit import api # pip install livekit-api
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
def generate_token(room_name: str, participant_name: str, api_key: str, api_secret: str) -> str:
|
||||
token = api.AccessToken(api_key, api_secret)
|
||||
token.with_identity(participant_name).with_name(participant_name).with_grants(
|
||||
api.VideoGrants(
|
||||
room_join=True,
|
||||
room=room_name,
|
||||
)
|
||||
)
|
||||
|
||||
return token.to_jwt()
|
||||
|
||||
|
||||
async def configure_livekit():
|
||||
parser = argparse.ArgumentParser(description="LiveKit AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-r", "--room", type=str, required=False, help="Name of the LiveKit room to join"
|
||||
)
|
||||
parser.add_argument("-u", "--url", type=str, required=False, help="URL of the LiveKit server")
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
room_name = args.room or os.getenv("LIVEKIT_ROOM_NAME")
|
||||
url = args.url or os.getenv("LIVEKIT_URL")
|
||||
api_key = os.getenv("LIVEKIT_API_KEY")
|
||||
api_secret = os.getenv("LIVEKIT_API_SECRET")
|
||||
|
||||
if not room_name:
|
||||
raise Exception(
|
||||
"No LiveKit room specified. Use the -r/--room option from the command line, or set LIVEKIT_ROOM_NAME in your environment."
|
||||
)
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No LiveKit server URL specified. Use the -u/--url option from the command line, or set LIVEKIT_URL in your environment."
|
||||
)
|
||||
|
||||
if not api_key or not api_secret:
|
||||
raise Exception(
|
||||
"LIVEKIT_API_KEY and LIVEKIT_API_SECRET must be set in environment variables."
|
||||
)
|
||||
|
||||
token = generate_token(room_name, "Say One Thing", api_key, api_secret)
|
||||
|
||||
user_token = generate_token(room_name, "User", api_key, api_secret)
|
||||
logger.info(f"User token: {user_token}")
|
||||
|
||||
return (url, token, room_name)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(url, token, room_name) = await configure_livekit()
|
||||
|
||||
transport = LiveKitTransport(
|
||||
url=url,
|
||||
token=token,
|
||||
room_name=room_name,
|
||||
params=LiveKitParams(audio_out_enabled=True, audio_out_sample_rate=16000),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant_id):
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frame(
|
||||
TextFrame(
|
||||
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
||||
)
|
||||
)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -13,7 +13,7 @@ from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -22,35 +22,34 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Say One Thing From an LLM",
|
||||
DailyParams(audio_out_enabled=True))
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing From an LLM", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an LLM in a WebRTC session, and this is a 'hello world' demo. Say hello to the world.",
|
||||
}]
|
||||
}
|
||||
]
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
@@ -64,5 +63,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -21,29 +21,26 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Show a still frame image",
|
||||
DailyParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024
|
||||
)
|
||||
DailyParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
@@ -64,5 +61,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -22,6 +22,7 @@ from pipecat.transports.local.tk import TkLocalTransport
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -35,15 +36,11 @@ async def main():
|
||||
|
||||
transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024))
|
||||
TransportParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
@@ -56,7 +53,7 @@ async def main():
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def run_tk():
|
||||
while runner.is_active():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
@@ -4,6 +4,10 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
#
|
||||
# This example broken on latest pipecat and needs updating.
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
@@ -24,14 +28,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(room_url, None, "Static And Dynamic Speech")
|
||||
|
||||
meeting = TransportServiceOutput(transport, mic_enabled=True)
|
||||
@@ -52,8 +59,7 @@ async def main(room_url: str):
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
messages = [{"role": "system",
|
||||
"content": "tell the user a joke about llamas"}]
|
||||
messages = [{"role": "system", "content": "tell the user a joke about llamas"}]
|
||||
|
||||
# Start a task to run the LLM to create a joke, and convert the LLM
|
||||
# output to audio frames. This task will run in parallel with generating
|
||||
@@ -71,8 +77,7 @@ async def main(room_url: str):
|
||||
]
|
||||
)
|
||||
|
||||
merge_pipeline = SequentialMergePipeline(
|
||||
[simple_tts_pipeline, llm_pipeline])
|
||||
merge_pipeline = SequentialMergePipeline([simple_tts_pipeline, llm_pipeline])
|
||||
|
||||
await asyncio.gather(
|
||||
transport.run(merge_pipeline),
|
||||
@@ -82,5 +87,4 @@ async def main(room_url: str):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,22 +14,18 @@ from dataclasses import dataclass
|
||||
from pipecat.frames.frames import (
|
||||
AppFrame,
|
||||
Frame,
|
||||
ImageRawFrame,
|
||||
TextFrame,
|
||||
EndFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMMessagesFrame,
|
||||
LLMResponseStartFrame,
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.aggregators.gated import GatedAggregator
|
||||
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.aggregators.parallel_task import ParallelTask
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -38,6 +34,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -59,20 +56,24 @@ class MonthPrepender(FrameProcessor):
|
||||
self.prepend_to_next_text_frame = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, MonthFrame):
|
||||
self.most_recent_month = frame.month
|
||||
elif self.prepend_to_next_text_frame and isinstance(frame, TextFrame):
|
||||
await self.push_frame(TextFrame(f"{self.most_recent_month}: {frame.text}"))
|
||||
self.prepend_to_next_text_frame = False
|
||||
elif isinstance(frame, LLMResponseStartFrame):
|
||||
elif isinstance(frame, LLMFullResponseStartFrame):
|
||||
self.prepend_to_next_text_frame = True
|
||||
await self.push_frame(frame)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
@@ -81,48 +82,46 @@ async def main(room_url):
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024
|
||||
)
|
||||
camera_out_height=1024,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
gated_aggregator = GatedAggregator(
|
||||
gate_open_fn=lambda frame: isinstance(frame, ImageRawFrame),
|
||||
gate_close_fn=lambda frame: isinstance(frame, LLMResponseStartFrame),
|
||||
start_open=False
|
||||
)
|
||||
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
month_prepender = MonthPrepender()
|
||||
llm_full_response_aggregator = LLMFullResponseAggregator()
|
||||
|
||||
pipeline = Pipeline([
|
||||
llm,
|
||||
sentence_aggregator,
|
||||
ParallelTask(
|
||||
[month_prepender, tts],
|
||||
[llm_full_response_aggregator, imagegen]
|
||||
),
|
||||
gated_aggregator,
|
||||
transport.output()
|
||||
])
|
||||
# With `SyncParallelPipeline` we synchronize audio and images by pushing
|
||||
# them basically in order (e.g. I1 A1 A1 A1 I2 A2 A2 A2 A2 I3 A3). To do
|
||||
# that, each pipeline runs concurrently and `SyncParallelPipeline` will
|
||||
# wait for the input frame to be processed.
|
||||
#
|
||||
# Note that `SyncParallelPipeline` requires the last processor in each
|
||||
# of the pipelines to be synchronous. In this case, we use
|
||||
# `CartesiaHttpTTSService` and `FalImageGenService` which make HTTP
|
||||
# requests and wait for the response.
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
llm, # LLM
|
||||
sentence_aggregator, # Aggregates LLM output into full sentences
|
||||
SyncParallelPipeline( # Run pipelines in parallel aggregating the result
|
||||
[month_prepender, tts], # Create "Month: sentence" and output audio
|
||||
[imagegen], # Generate image
|
||||
),
|
||||
transport.output(), # Transport output
|
||||
]
|
||||
)
|
||||
|
||||
frames = []
|
||||
for month in [
|
||||
@@ -148,8 +147,6 @@ async def main(room_url):
|
||||
frames.append(MonthFrame(month=month))
|
||||
frames.append(LLMMessagesFrame(messages))
|
||||
|
||||
frames.append(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
@@ -160,5 +157,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -11,18 +11,25 @@ import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame, Frame, URLImageRawFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
OutputAudioRawFrame,
|
||||
TTSAudioRawFrame,
|
||||
URLImageRawFrame,
|
||||
LLMMessagesFrame,
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkOutputTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
@@ -42,7 +49,12 @@ async def main():
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def get_month_data(month):
|
||||
messages = [{"role": "system", "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.", }]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.",
|
||||
}
|
||||
]
|
||||
|
||||
class ImageDescription(FrameProcessor):
|
||||
def __init__(self):
|
||||
@@ -50,6 +62,8 @@ async def main():
|
||||
self.text = ""
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TextFrame):
|
||||
self.text = frame.text
|
||||
await self.push_frame(frame, direction)
|
||||
@@ -58,12 +72,16 @@ async def main():
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.audio = bytearray()
|
||||
self.frame = None
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TTSAudioRawFrame):
|
||||
self.audio.extend(frame.audio)
|
||||
self.frame = AudioRawFrame(
|
||||
bytes(self.audio), frame.sample_rate, frame.num_channels)
|
||||
self.frame = OutputAudioRawFrame(
|
||||
bytes(self.audio), frame.sample_rate, frame.num_channels
|
||||
)
|
||||
|
||||
class ImageGrabber(FrameProcessor):
|
||||
def __init__(self):
|
||||
@@ -71,26 +89,25 @@ async def main():
|
||||
self.frame = None
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, URLImageRawFrame):
|
||||
self.frame = frame
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"))
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
aggregator = LLMFullResponseAggregator()
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
|
||||
description = ImageDescription()
|
||||
|
||||
@@ -98,9 +115,27 @@ async def main():
|
||||
|
||||
image_grabber = ImageGrabber()
|
||||
|
||||
pipeline = Pipeline([llm, aggregator, description,
|
||||
ParallelPipeline([tts, audio_grabber],
|
||||
[imagegen, image_grabber])])
|
||||
# With `SyncParallelPipeline` we synchronize audio and images by
|
||||
# pushing them basically in order (e.g. I1 A1 A1 A1 I2 A2 A2 A2 A2
|
||||
# I3 A3). To do that, each pipeline runs concurrently and
|
||||
# `SyncParallelPipeline` will wait for the input frame to be
|
||||
# processed.
|
||||
#
|
||||
# Note that `SyncParallelPipeline` requires the last processor in
|
||||
# each of the pipelines to be synchronous. In this case, we use
|
||||
# `CartesiaHttpTTSService` and `FalImageGenService` which make HTTP
|
||||
# requests and wait for the response.
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
llm, # LLM
|
||||
sentence_aggregator, # Aggregates LLM output into full sentences
|
||||
description, # Store sentence
|
||||
SyncParallelPipeline(
|
||||
[tts, audio_grabber], # Generate and store audio for the given sentence
|
||||
[imagegen, image_grabber], # Generate and storeimage for the given sentence
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frame(LLMMessagesFrame(messages))
|
||||
@@ -121,20 +156,19 @@ async def main():
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024))
|
||||
camera_out_height=1024,
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
# We only specify 5 months as we create tasks all at once and we might
|
||||
# get rate limited otherwise.
|
||||
# We only specify a few months as we create tasks all at once and we
|
||||
# might get rate limited otherwise.
|
||||
months: list[str] = [
|
||||
"January",
|
||||
"February",
|
||||
# "March",
|
||||
# "April",
|
||||
# "May",
|
||||
]
|
||||
|
||||
# We create one task per month. This will be executed concurrently.
|
||||
@@ -152,7 +186,7 @@ async def main():
|
||||
await runner.stop_when_done()
|
||||
|
||||
async def run_tk():
|
||||
while True:
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
@@ -9,70 +9,103 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.frames.frames import Frame, LLMMessagesFrame, MetricsFrame
|
||||
from pipecat.metrics.metrics import (
|
||||
TTFBMetricsData,
|
||||
ProcessingMetricsData,
|
||||
LLMUsageMetricsData,
|
||||
TTSUsageMetricsData,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVAD
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
class MetricsLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, MetricsFrame):
|
||||
for d in frame.data:
|
||||
if isinstance(d, TTFBMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, ttfb: {d.value}")
|
||||
elif isinstance(d, ProcessingMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, processing: {d.value}")
|
||||
elif isinstance(d, LLMUsageMetricsData):
|
||||
tokens = d.value
|
||||
print(
|
||||
f"!!! MetricsFrame: {frame}, tokens: {
|
||||
tokens.prompt_tokens}, characters: {
|
||||
tokens.completion_tokens}"
|
||||
)
|
||||
elif isinstance(d, TTSUsageMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, characters: {d.value}")
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True, # This is so Silero VAD can get audio data
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True
|
||||
)
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
vad = SileroVAD()
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
fl_in = FrameLogger("Inner")
|
||||
fl_out = FrameLogger("Outer")
|
||||
ml = MetricsLogger()
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so it should not contain special characters. Respond to what the user said in a creative and helpful way.",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([fl_in, transport.input(), vad, tma_in, llm,
|
||||
fl_out, tts, tma_out, transport.output()])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
ml,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -80,8 +113,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -90,5 +122,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -11,18 +11,19 @@ import sys
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import ImageRawFrame, Frame, SystemFrame, TextFrame
|
||||
from pipecat.frames.frames import Frame, OutputImageRawFrame, SystemFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator,
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from runner import configure
|
||||
@@ -30,6 +31,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -48,63 +50,88 @@ class ImageSyncAggregator(FrameProcessor):
|
||||
self._waiting_image_bytes = self._waiting_image.tobytes()
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if not isinstance(frame, SystemFrame):
|
||||
await self.push_frame(ImageRawFrame(image=self._speaking_image_bytes, size=(1024, 1024), format=self._speaking_image_format))
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if not isinstance(frame, SystemFrame) and direction == FrameDirection.DOWNSTREAM:
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(
|
||||
image=self._speaking_image_bytes,
|
||||
size=(1024, 1024),
|
||||
format=self._speaking_image_format,
|
||||
)
|
||||
)
|
||||
await self.push_frame(frame)
|
||||
await self.push_frame(ImageRawFrame(image=self._waiting_image_bytes, size=(1024, 1024), format=self._waiting_image_format))
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(
|
||||
image=self._waiting_image_bytes,
|
||||
size=(1024, 1024),
|
||||
format=self._waiting_image_format,
|
||||
)
|
||||
)
|
||||
else:
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024,
|
||||
transcription_enabled=True
|
||||
)
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so it should not contain special characters. Respond to what the user said in a creative and helpful way.",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserContextAggregator(messages)
|
||||
tma_out = LLMAssistantContextAggregator(messages)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
image_sync_aggregator = ImageSyncAggregator(
|
||||
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
|
||||
os.path.join(os.path.dirname(__file__), "assets", "waiting.png"),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([transport.input(), image_sync_aggregator,
|
||||
tma_in, llm, tma_out, tts, transport.output()])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
image_sync_aggregator,
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
participant_name = participant["info"]["userName"] or ""
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([TextFrame(f"Hi, this is {participant_name}.")])
|
||||
await task.queue_frames([TextFrame(f"Hi there {participant_name}!")])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
@@ -112,5 +139,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,75 +1,104 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import logging
|
||||
import os
|
||||
from pipecat.pipeline.aggregators import (
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.services.ai_services import FrameLogger
|
||||
from pipecat.transports.daily_transport import DailyTransport
|
||||
from pipecat.services.open_ai_services import OpenAILLMService
|
||||
from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
|
||||
logger = logging.getLogger("pipecat")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
duration_minutes=5,
|
||||
start_transcription=True,
|
||||
mic_enabled=True,
|
||||
mic_sample_rate=16000,
|
||||
camera_enabled=False,
|
||||
vad_enabled=True,
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
pipeline = Pipeline([FrameLogger(), llm, FrameLogger(), tts])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
@transport.event_handler("on_first_other_participant_joined")
|
||||
async def on_first_other_participant_joined(transport, participant):
|
||||
await transport.say("Hi, I'm listening!", tts)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
async def run_conversation():
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
await transport.run_interruptible_pipeline(
|
||||
pipeline,
|
||||
post_processor=LLMAssistantResponseAggregator(messages),
|
||||
pre_processor=LLMUserResponseAggregator(messages),
|
||||
)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
await asyncio.gather(transport.run(), run_conversation())
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
100
examples/foundational/07a-interruptible-anthropic.py
Normal file
100
examples/foundational/07a-interruptible-anthropic.py
Normal file
@@ -0,0 +1,100 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-opus-20240229"
|
||||
)
|
||||
|
||||
# todo: think more about how to handle system prompts in a more general way. OpenAI,
|
||||
# Google, and Anthropic all have slightly different approaches to providing a system
|
||||
# prompt.
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative, helpful, and brief way. Say hello.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
127
examples/foundational/07b-interruptible-langchain.py
Normal file
127
examples/foundational/07b-interruptible-langchain.py
Normal file
@@ -0,0 +1,127 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain_community.chat_message_histories import ChatMessageHistory
|
||||
from langchain_core.chat_history import BaseChatMessageHistory
|
||||
from langchain_core.runnables.history import RunnableWithMessageHistory
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
message_store = {}
|
||||
|
||||
|
||||
def get_session_history(session_id: str) -> BaseChatMessageHistory:
|
||||
if session_id not in message_store:
|
||||
message_store[session_id] = ChatMessageHistory()
|
||||
return message_store[session_id]
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
(
|
||||
"system",
|
||||
"Be nice and helpful. Answer very briefly and without special characters like `#` or `*`. "
|
||||
"Your response will be synthesized to voice and those characters will create unnatural sounds.",
|
||||
),
|
||||
MessagesPlaceholder("chat_history"),
|
||||
("human", "{input}"),
|
||||
]
|
||||
)
|
||||
chain = prompt | ChatOpenAI(model="gpt-4o", temperature=0.7)
|
||||
history_chain = RunnableWithMessageHistory(
|
||||
chain,
|
||||
get_session_history,
|
||||
history_messages_key="chat_history",
|
||||
input_messages_key="input",
|
||||
)
|
||||
lc = LangchainProcessor(history_chain)
|
||||
|
||||
tma_in = LLMUserResponseAggregator()
|
||||
tma_out = LLMAssistantResponseAggregator()
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
lc, # Langchain
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
lc.set_participant_id(participant["id"])
|
||||
# Kick off the conversation.
|
||||
# the `LLMMessagesFrame` will be picked up by the LangchainProcessor using
|
||||
# only the content of the last message to inject it in the prompt defined
|
||||
# above. So no role is required here.
|
||||
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
94
examples/foundational/07c-interruptible-deepgram.py
Normal file
94
examples/foundational/07c-interruptible-deepgram.py
Normal file
@@ -0,0 +1,94 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
102
examples/foundational/07d-interruptible-elevenlabs.py
Normal file
102
examples/foundational/07d-interruptible-elevenlabs.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
98
examples/foundational/07e-interruptible-playht.py
Normal file
98
examples/foundational/07e-interruptible-playht.py
Normal file
@@ -0,0 +1,98 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.playht import PlayHTTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = PlayHTTTSService(
|
||||
user_id=os.getenv("PLAYHT_USER_ID"),
|
||||
api_key=os.getenv("PLAYHT_API_KEY"),
|
||||
voice_url="s3://voice-cloning-zero-shot/801a663f-efd0-4254-98d0-5c175514c3e8/jennifer/manifest.json",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
107
examples/foundational/07f-interruptible-azure.py
Normal file
107
examples/foundational/07f-interruptible-azure.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.azure import AzureLLMService, AzureSTTService, AzureTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = AzureSTTService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
llm = AzureLLMService(
|
||||
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
|
||||
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
|
||||
model=os.getenv("AZURE_CHATGPT_MODEL"),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
94
examples/foundational/07g-interruptible-openai-tts.py
Normal file
94
examples/foundational/07g-interruptible-openai-tts.py
Normal file
@@ -0,0 +1,94 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.openai import OpenAITTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="alloy")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
102
examples/foundational/07h-interruptible-openpipe.py
Normal file
102
examples/foundational/07h-interruptible-openpipe.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openpipe import OpenPipeLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
import time
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
timestamp = int(time.time())
|
||||
llm = OpenPipeLLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
openpipe_api_key=os.getenv("OPENPIPE_API_KEY"),
|
||||
model="gpt-4o",
|
||||
tags={"conversation_id": f"pipecat-{timestamp}"},
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
99
examples/foundational/07i-interruptible-xtts.py
Normal file
99
examples/foundational/07i-interruptible-xtts.py
Normal file
@@ -0,0 +1,99 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.xtts import XTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = XTTSService(
|
||||
aiohttp_session=session,
|
||||
voice_id="Claribel Dervla",
|
||||
language="en",
|
||||
base_url="http://localhost:8000",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
102
examples/foundational/07j-interruptible-gladia.py
Normal file
102
examples/foundational/07j-interruptible-gladia.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.gladia import GladiaSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = GladiaSTTService(
|
||||
api_key=os.getenv("GLADIA_API_KEY"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
94
examples/foundational/07k-interruptible-lmnt.py
Normal file
94
examples/foundational/07k-interruptible-lmnt.py
Normal file
@@ -0,0 +1,94 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.lmnt import LmntTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = LmntTTSService(api_key=os.getenv("LMNT_API_KEY"), voice_id="morgan")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
107
examples/foundational/07l-interruptible-together.py
Normal file
107
examples/foundational/07l-interruptible-together.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.together import TogetherLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = TogetherLLMService(
|
||||
api_key=os.getenv("TOGETHER_API_KEY"),
|
||||
model=os.getenv("TOGETHER_MODEL"),
|
||||
params=TogetherLLMService.InputParams(
|
||||
temperature=1.0,
|
||||
top_p=0.9,
|
||||
top_k=40,
|
||||
extra={
|
||||
"frequency_penalty": 2.0,
|
||||
"presence_penalty": 0.0,
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
102
examples/foundational/07m-interruptible-aws.py
Normal file
102
examples/foundational/07m-interruptible-aws.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.aws import AWSTTSService
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = AWSTTSService(
|
||||
api_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
voice_id="Amy",
|
||||
params=AWSTTSService.InputParams(engine="neural", language="en-GB", rate="1.05"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
100
examples/foundational/07n-interruptible-google.py
Normal file
100
examples/foundational/07n-interruptible-google.py
Normal file
@@ -0,0 +1,100 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.services.google import GoogleTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = GoogleTTSService(
|
||||
credentials=os.getenv("GOOGLE_CREDENTIALS"),
|
||||
voice_id="en-US-Neural2-J",
|
||||
params=GoogleTTSService.InputParams(language="en-US", rate="1.05"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -3,18 +3,19 @@ import aiohttp
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from pipecat.pipeline.aggregators import SentenceAggregator
|
||||
from pipecat.processors.aggregators import SentenceAggregator
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
|
||||
from pipecat.transports.daily_transport import DailyTransport
|
||||
from pipecat.services.azure_ai_services import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService
|
||||
from pipecat.services.fal_ai_services import FalImageGenService
|
||||
from pipecat.pipeline.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
|
||||
@@ -22,8 +23,10 @@ logger = logging.getLogger("pipecat")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
@@ -51,9 +54,7 @@ async def main(room_url: str):
|
||||
voice_id="jBpfuIE2acCO8z3wKNLl",
|
||||
)
|
||||
dalle = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="1024x1024"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="1024x1024"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
@@ -73,13 +74,11 @@ async def main(room_url: str):
|
||||
|
||||
async def get_text_and_audio(messages) -> Tuple[str, bytearray]:
|
||||
"""This function streams text from the LLM and uses the TTS service to convert
|
||||
that text to speech as it's received. """
|
||||
that text to speech as it's received."""
|
||||
source_queue = asyncio.Queue()
|
||||
sink_queue = asyncio.Queue()
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
pipeline = Pipeline(
|
||||
[llm, sentence_aggregator, tts1], source_queue, sink_queue
|
||||
)
|
||||
pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)
|
||||
|
||||
await source_queue.put(LLMMessagesFrame(messages))
|
||||
await source_queue.put(EndFrame())
|
||||
@@ -144,5 +143,4 @@ async def main(room_url: str):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,15 +4,21 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame, ImageRawFrame
|
||||
from pipecat.processors.filter import Filter
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
InputImageRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputImageRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.services.daily import DailyTransport, DailyParams
|
||||
|
||||
from runner import configure
|
||||
@@ -20,43 +26,63 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url, token):
|
||||
transport = DailyTransport(
|
||||
room_url, token, "Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720
|
||||
class MirrorProcessor(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, InputAudioRawFrame):
|
||||
await self.push_frame(
|
||||
OutputAudioRawFrame(
|
||||
audio=frame.audio,
|
||||
sample_rate=frame.sample_rate,
|
||||
num_channels=frame.num_channels,
|
||||
)
|
||||
)
|
||||
elif isinstance(frame, InputImageRawFrame):
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(image=frame.image, size=frame.size, format=frame.format)
|
||||
)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
|
||||
# The ParallelPipeline is not really necessary here but it shows how you
|
||||
# would process audio and video concurrently in parallel pipelines.
|
||||
pipeline = Pipeline([transport.input(),
|
||||
ParallelPipeline(
|
||||
[Filter([AudioRawFrame])],
|
||||
[Filter([ImageRawFrame])]),
|
||||
transport.output()])
|
||||
pipeline = Pipeline([transport.input(), MirrorProcessor(), transport.output()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,14 +4,23 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
InputImageRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputImageRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -21,45 +30,73 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url, token):
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Local Mirror")
|
||||
class MirrorProcessor(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
daily_transport = DailyTransport(room_url, token, "Test", DailyParams(audio_in_enabled=True))
|
||||
if isinstance(frame, InputAudioRawFrame):
|
||||
await self.push_frame(
|
||||
OutputAudioRawFrame(
|
||||
audio=frame.audio,
|
||||
sample_rate=frame.sample_rate,
|
||||
num_channels=frame.num_channels,
|
||||
)
|
||||
)
|
||||
elif isinstance(frame, InputImageRawFrame):
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(image=frame.image, size=frame.size, format=frame.format)
|
||||
)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720))
|
||||
|
||||
@daily_transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
pipeline = Pipeline([daily_transport.input(), tk_transport.output()])
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Local Mirror")
|
||||
|
||||
runner = PipelineRunner()
|
||||
daily_transport = DailyTransport(
|
||||
room_url, token, "Test", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
async def run_tk():
|
||||
while runner.is_active():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
),
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
@daily_transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
|
||||
await asyncio.gather(runner.run(task), run_tk())
|
||||
pipeline = Pipeline([daily_transport.input(), MirrorProcessor(), tk_transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
async def run_tk():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await asyncio.gather(runner.run(task), run_tk())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
96
examples/foundational/10-wake-phrase.py
Normal file
96
examples/foundational/10-wake-phrase.py
Normal file
@@ -0,0 +1,96 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.processors.filters.wake_check_filter import WakeCheckFilter
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Robot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant. Respond to what the user said in a creative and helpful way. Keep your responses brief.",
|
||||
},
|
||||
]
|
||||
|
||||
hey_robot_filter = WakeCheckFilter(["hey robot", "hey, robot"])
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
hey_robot_filter, # Filter out speech not directed at the robot
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await tts.say("Hi! If you want to talk to me, just say 'Hey Robot'.")
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,181 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
SystemFrame,
|
||||
TextFrame,
|
||||
ImageRawFrame,
|
||||
SpriteFrame,
|
||||
TranscriptionFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import (
|
||||
LLMUserContextAggregator,
|
||||
LLMAssistantContextAggregator,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
sprites = {}
|
||||
image_files = [
|
||||
"sc-default.png",
|
||||
"sc-talk.png",
|
||||
"sc-listen-1.png",
|
||||
"sc-think-1.png",
|
||||
"sc-think-2.png",
|
||||
"sc-think-3.png",
|
||||
"sc-think-4.png",
|
||||
]
|
||||
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
for file in image_files:
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, "assets", file)
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites[file] = ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format)
|
||||
|
||||
# When the bot isn't talking, show a static image of the cat listening
|
||||
quiet_frame = sprites["sc-listen-1.png"]
|
||||
|
||||
# When the bot is talking, build an animation from two sprites
|
||||
talking_list = [sprites["sc-default.png"], sprites["sc-talk.png"]]
|
||||
talking = [random.choice(talking_list) for x in range(30)]
|
||||
talking_frame = SpriteFrame(talking)
|
||||
|
||||
# TODO: Support "thinking" as soon as we get a valid transcript, while LLM
|
||||
# is processing
|
||||
thinking_list = [
|
||||
sprites["sc-think-1.png"],
|
||||
sprites["sc-think-2.png"],
|
||||
sprites["sc-think-3.png"],
|
||||
sprites["sc-think-4.png"],
|
||||
]
|
||||
thinking_frame = SpriteFrame(thinking_list)
|
||||
|
||||
|
||||
class NameCheckFilter(FrameProcessor):
|
||||
def __init__(self, names: list[str]):
|
||||
super().__init__()
|
||||
self._names = names
|
||||
self._sentence = ""
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, SystemFrame):
|
||||
await self.push_frame(frame, direction)
|
||||
return
|
||||
|
||||
content: str = ""
|
||||
|
||||
# TODO: split up transcription by participant
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
content = frame.text
|
||||
self._sentence += content
|
||||
if self._sentence.endswith((".", "?", "!")):
|
||||
if any(name in self._sentence for name in self._names):
|
||||
await self.push_frame(TextFrame(self._sentence))
|
||||
self._sentence = ""
|
||||
else:
|
||||
self._sentence = ""
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class ImageSyncAggregator(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await self.push_frame(talking_frame)
|
||||
await self.push_frame(frame)
|
||||
await self.push_frame(quiet_frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Santa Cat",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=720,
|
||||
camera_out_height=1280,
|
||||
camera_out_framerate=10,
|
||||
transcription_enabled=True
|
||||
)
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="jBpfuIE2acCO8z3wKNLl",
|
||||
)
|
||||
isa = ImageSyncAggregator()
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Santa Cat, a cat that lives in Santa's workshop at the North Pole. You should be clever, and a bit sarcastic. You should also tell jokes every once in a while. Your responses should only be a few sentences long.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserContextAggregator(messages)
|
||||
tma_out = LLMAssistantContextAggregator(messages)
|
||||
ncf = NameCheckFilter(["Santa Cat", "Santa"])
|
||||
|
||||
pipeline = Pipeline([transport.input(), isa, ncf, tma_in,
|
||||
llm, tma_out, tts, transport.output()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
# Send some greeting at the beginning.
|
||||
await tts.say("Hi! If you want to talk to me, just say 'hey Santa Cat'.")
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
async def starting_image():
|
||||
await transport.send_image(quiet_frame)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await asyncio.gather(runner.run(task), starting_image())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
@@ -12,28 +12,30 @@ import wave
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
AudioRawFrame,
|
||||
LLMResponseEndFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMMessagesFrame,
|
||||
OutputAudioRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import (
|
||||
LLMUserContextAggregator,
|
||||
LLMAssistantContextAggregator,
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMUserResponseAggregator,
|
||||
LLMAssistantResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -52,14 +54,16 @@ for file in sound_files:
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the image and convert it to bytes
|
||||
with wave.open(full_path) as audio_file:
|
||||
sounds[file] = AudioRawFrame(audio_file.readframes(-1),
|
||||
audio_file.getframerate(), audio_file.getnchannels())
|
||||
sounds[file] = OutputAudioRawFrame(
|
||||
audio_file.readframes(-1), audio_file.getframerate(), audio_file.getnchannels()
|
||||
)
|
||||
|
||||
|
||||
class OutboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, LLMResponseEndFrame):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, LLMFullResponseEndFrame):
|
||||
await self.push_frame(sounds["ding1.wav"])
|
||||
# In case anything else downstream needs it
|
||||
await self.push_frame(frame, direction)
|
||||
@@ -68,8 +72,9 @@ class OutboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
|
||||
class InboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, LLMMessagesFrame):
|
||||
await self.push_frame(sounds["ding2.wav"])
|
||||
# In case anything else downstream needs it
|
||||
@@ -78,23 +83,27 @@ class InboundSoundEffectWrapper(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(audio_out_enabled=True, transcription_enabled=True)
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="ErXwobaYiN019PkySvjV",
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
messages = [
|
||||
@@ -104,15 +113,27 @@ async def main(room_url: str, token):
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserContextAggregator(messages)
|
||||
tma_out = LLMAssistantContextAggregator(messages)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
out_sound = OutboundSoundEffectWrapper()
|
||||
in_sound = InboundSoundEffectWrapper()
|
||||
fl = FrameLogger("LLM Out")
|
||||
fl2 = FrameLogger("Transcription In")
|
||||
|
||||
pipeline = Pipeline([transport.input(), tma_in, in_sound, fl2, llm,
|
||||
tma_out, fl, tts, out_sound, transport.output()])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
in_sound,
|
||||
fl2,
|
||||
llm,
|
||||
fl,
|
||||
tts,
|
||||
out_sound,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
@@ -128,5 +149,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,16 +16,17 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVAD
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +34,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -42,30 +42,29 @@ class UserImageRequester(FrameProcessor):
|
||||
self._participant_id = participant_id
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Describe participant video",
|
||||
DailyParams(
|
||||
audio_in_enabled=True, # This is so Silero VAD can get audio data
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True
|
||||
)
|
||||
)
|
||||
|
||||
vad = SileroVAD()
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
@@ -77,10 +76,9 @@ async def main(room_url: str, token):
|
||||
# If you run into weird description, try with use_cpu=True
|
||||
moondream = MoondreamService()
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -90,8 +88,17 @@ async def main(room_url: str, token):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline([transport.input(), vad, user_response, image_requester,
|
||||
vision_aggregator, moondream, tts, transport.output()])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
moondream,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -99,6 +106,6 @@ async def main(room_url: str, token):
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
113
examples/foundational/12a-describe-video-gemini-flash.py
Normal file
113
examples/foundational/12a-describe-video-gemini-flash.py
Normal file
@@ -0,0 +1,113 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
|
||||
def set_participant_id(self, participant_id: str):
|
||||
self._participant_id = participant_id
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Describe participant video",
|
||||
DailyParams(
|
||||
audio_in_enabled=True, # This is so Silero VAD can get audio data
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
|
||||
image_requester = UserImageRequester()
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
google = GoogleLLMService(
|
||||
model="gemini-1.5-flash-latest", api_key=os.getenv("GOOGLE_API_KEY")
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
transport.capture_participant_video(participant["id"], framerate=0)
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
google,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
110
examples/foundational/12b-describe-video-gpt-4o.py
Normal file
110
examples/foundational/12b-describe-video-gpt-4o.py
Normal file
@@ -0,0 +1,110 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
|
||||
def set_participant_id(self, participant_id: str):
|
||||
self._participant_id = participant_id
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Describe participant video",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
|
||||
image_requester = UserImageRequester()
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
openai = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
transport.capture_participant_video(participant["id"], framerate=0)
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
openai,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
113
examples/foundational/12c-describe-video-anthropic.py
Normal file
113
examples/foundational/12c-describe-video-anthropic.py
Normal file
@@ -0,0 +1,113 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
|
||||
def set_participant_id(self, participant_id: str):
|
||||
self._participant_id = participant_id
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Describe participant video",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
|
||||
image_requester = UserImageRequester()
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
anthropic = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
params=CartesiaTTSService.InputParams(
|
||||
sample_rate=16000,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
transport.capture_participant_video(participant["id"], framerate=0)
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
anthropic,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
@@ -20,6 +21,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -27,29 +29,33 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
transport = DailyTransport(room_url, None, "Transcription bot",
|
||||
DailyParams(audio_in_enabled=True))
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
stt = WhisperSTTService()
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Transcription bot", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
stt = WhisperSTTService()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
runner = PipelineRunner()
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,11 +16,10 @@ from pipecat.services.whisper import WhisperSTTService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -28,13 +27,14 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async def main():
|
||||
transport = LocalAudioTransport(TransportParams(audio_in_enabled=True))
|
||||
|
||||
stt = WhisperSTTService()
|
||||
@@ -51,5 +51,4 @@ async def main(room_url: str):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
62
examples/foundational/13b-deepgram-transcription.py
Normal file
62
examples/foundational/13b-deepgram-transcription.py
Normal file
@@ -0,0 +1,62 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Transcription bot", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
136
examples/foundational/14-function-calling.py
Normal file
136
examples/foundational/14-function-calling.py
Normal file
@@ -0,0 +1,136 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def start_fetch_weather(function_name, llm, context):
|
||||
# note: we can't push a frame to the LLM here. the bot
|
||||
# can interrupt itself and/or cause audio overlapping glitches.
|
||||
# possible question for Aleix and Chad about what the right way
|
||||
# to trigger speech is, now, with the new queues/async/sync refactors.
|
||||
await llm.push_frame(TextFrame("Let me check on that. "))
|
||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
||||
|
||||
|
||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
# Register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
fl_in = FrameLogger("Inner")
|
||||
fl_out = FrameLogger("Outer")
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
},
|
||||
)
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
# fl_in,
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
# fl_out,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await tts.say("Hi! Ask me about the weather in San Francisco.")
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
160
examples/foundational/15-switch-voices.py
Normal file
160
examples/foundational/15-switch-voices.py
Normal file
@@ -0,0 +1,160 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.filters.function_filter import FunctionFilter
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
current_voice = "News Lady"
|
||||
|
||||
|
||||
async def switch_voice(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
global current_voice
|
||||
current_voice = args["voice"]
|
||||
await result_callback(
|
||||
{
|
||||
"voice": f"You are now using your {current_voice} voice. Your responses should now be as if you were a {current_voice}."
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def news_lady_filter(frame) -> bool:
|
||||
return current_voice == "News Lady"
|
||||
|
||||
|
||||
async def british_lady_filter(frame) -> bool:
|
||||
return current_voice == "British Lady"
|
||||
|
||||
|
||||
async def barbershop_man_filter(frame) -> bool:
|
||||
return current_voice == "Barbershop Man"
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Pipecat",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
news_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="bf991597-6c13-47e4-8411-91ec2de5c466", # Newslady
|
||||
)
|
||||
|
||||
british_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
barbershop_man = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm.register_function("switch_voice", switch_voice)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "switch_voice",
|
||||
"description": "Switch your voice only when the user asks you to",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"voice": {
|
||||
"type": "string",
|
||||
"description": "The voice the user wants you to use",
|
||||
},
|
||||
},
|
||||
"required": ["voice"],
|
||||
},
|
||||
},
|
||||
)
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities. Respond to what the user said in a creative and helpful way. Your output should not include non-alphanumeric characters. You can do the following voices: 'News Lady', 'British Lady' and 'Barbershop Man'.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (one of the following vocies)
|
||||
[FunctionFilter(news_lady_filter), news_lady], # News Lady voice
|
||||
[FunctionFilter(british_lady_filter), british_lady], # British Lady voice
|
||||
[FunctionFilter(barbershop_man_filter), barbershop_man], # Barbershop Man voice
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Please introduce yourself to the user and let them know the voices you can do. Your initial responses should be as if you were a {current_voice}.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
151
examples/foundational/15a-switch-languages.py
Normal file
151
examples/foundational/15a-switch-languages.py
Normal file
@@ -0,0 +1,151 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.filters.function_filter import FunctionFilter
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.whisper import Model, WhisperSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
current_language = "English"
|
||||
|
||||
|
||||
async def switch_language(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
global current_language
|
||||
current_language = args["language"]
|
||||
await result_callback({"voice": f"Your answers from now on should be in {current_language}."})
|
||||
|
||||
|
||||
async def english_filter(frame) -> bool:
|
||||
return current_language == "English"
|
||||
|
||||
|
||||
async def spanish_filter(frame) -> bool:
|
||||
return current_language == "Spanish"
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Pipecat",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = WhisperSTTService(model=Model.LARGE)
|
||||
|
||||
english_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
spanish_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="846d6cb0-2301-48b6-9683-48f5618ea2f6", # Spanish-speaking Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm.register_function("switch_language", switch_language)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "switch_language",
|
||||
"description": "Switch to another language when the user asks you to",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"language": {
|
||||
"type": "string",
|
||||
"description": "The language the user wants you to speak",
|
||||
},
|
||||
},
|
||||
"required": ["language"],
|
||||
},
|
||||
},
|
||||
)
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities. Respond to what the user said in a creative and helpful way. Your output should not include non-alphanumeric characters. You can speak the following languages: 'English' and 'Spanish'.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (bot will speak the chosen language)
|
||||
[FunctionFilter(english_filter), english_tts], # English
|
||||
[FunctionFilter(spanish_filter), spanish_tts], # Spanish
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Please introduce yourself to the user and let them know the languages you speak. Your initial responses should be in {current_language}.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
142
examples/foundational/16-gpu-container-local-bot.py
Normal file
142
examples/foundational/16-gpu-container-local-bot.py
Normal file
@@ -0,0 +1,142 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import (
|
||||
DailyParams,
|
||||
DailyTransport,
|
||||
DailyTransportMessageFrame,
|
||||
)
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = DeepgramTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
voice="aura-asteria-en",
|
||||
base_url="http://0.0.0.0:8080/v1/speak",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
# To use OpenAI
|
||||
# api_key=os.getenv("OPENAI_API_KEY"),
|
||||
# model="gpt-4o"
|
||||
# Or, to use a local vLLM (or similar) api server
|
||||
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
||||
base_url="http://0.0.0.0:8000/v1",
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
# When a participant joins, start transcription for that participant so the
|
||||
# bot can "hear" and respond to them.
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
# When the first participant joins, the bot should introduce itself.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
# Handle "latency-ping" messages. The client will send app messages that look like
|
||||
# this:
|
||||
# { "latency-ping": { ts: <client-side timestamp> }}
|
||||
#
|
||||
# We want to send an immediate pong back to the client from this handler function.
|
||||
# Also, we will push a frame into the top of the pipeline and send it after the
|
||||
#
|
||||
@transport.event_handler("on_app_message")
|
||||
async def on_app_message(transport, message, sender):
|
||||
try:
|
||||
if "latency-ping" in message:
|
||||
logger.debug(f"Received latency ping app message: {message}")
|
||||
ts = message["latency-ping"]["ts"]
|
||||
# Send immediately
|
||||
transport.output().send_message(
|
||||
DailyTransportMessageFrame(
|
||||
message={"latency-pong-msg-handler": {"ts": ts}}, participant_id=sender
|
||||
)
|
||||
)
|
||||
# And push to the pipeline for the Daily transport.output to send
|
||||
await tma_in.push_frame(
|
||||
DailyTransportMessageFrame(
|
||||
message={"latency-pong-pipeline-delivery": {"ts": ts}},
|
||||
participant_id=sender,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"message handling error: {e} - {message}")
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
116
examples/foundational/17-detect-user-idle.py
Normal file
116
examples/foundational/17-detect-user-idle.py
Normal file
@@ -0,0 +1,116 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
async def user_idle_callback(user_idle: UserIdleProcessor):
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Ask the user if they are still there and try to prompt for some input, but be short.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
|
||||
user_idle = UserIdleProcessor(callback=user_idle_callback, timeout=5.0)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
user_idle, # Idle user check-in
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
76
examples/foundational/18-gstreamer-filesrc.py
Normal file
76
examples/foundational/18-gstreamer-filesrc.py
Normal file
@@ -0,0 +1,76 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure_with_args
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument("-i", "--input", type=str, required=True, help="Input video file")
|
||||
|
||||
(room_url, _, args) = await configure_with_args(session, parser)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"GStreamer",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_is_live=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
camera_out_is_live=True,
|
||||
),
|
||||
)
|
||||
|
||||
gst = GStreamerPipelineSource(
|
||||
pipeline=f"filesrc location={args.input}",
|
||||
out_params=GStreamerPipelineSource.OutputParams(
|
||||
video_width=1280,
|
||||
video_height=720,
|
||||
audio_sample_rate=16000,
|
||||
audio_channels=1,
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
gst, # GStreamer file source
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
67
examples/foundational/18a-gstreamer-videotestsrc.py
Normal file
67
examples/foundational/18a-gstreamer-videotestsrc.py
Normal file
@@ -0,0 +1,67 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"GStreamer",
|
||||
DailyParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
camera_out_is_live=True,
|
||||
),
|
||||
)
|
||||
|
||||
gst = GStreamerPipelineSource(
|
||||
pipeline='videotestsrc ! capsfilter caps="video/x-raw,width=1280,height=720,framerate=30/1"',
|
||||
out_params=GStreamerPipelineSource.OutputParams(
|
||||
video_width=1280, video_height=720, clock_sync=False
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
gst, # GStreamer file source
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
118
examples/foundational/19a-tools-anthropic.py
Normal file
118
examples/foundational/19a-tools-anthropic.py
Normal file
@@ -0,0 +1,118 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-5-sonnet-20240620"
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
# messages = [{"role": "system",
|
||||
# "content": "You are a helpful assistant who can report the weather in any location in the universe. Respond concisely. Your response will be turned into speech so use only simple words and punctuation."},
|
||||
# {"role": "user",
|
||||
# "content": " Start the conversation by introducing yourself."}]
|
||||
|
||||
messages = [{"role": "user", "content": "Say 'hello' to start the conversation."}]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User spoken responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
173
examples/foundational/19b-tools-video-anthropic.py
Normal file
173
examples/foundational/19b-tools-video-anthropic.py
Normal file
@@ -0,0 +1,173 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
video_participant_id = None
|
||||
|
||||
|
||||
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def get_image(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
question = arguments["question"]
|
||||
await llm.request_image_frame(user_id=video_participant_id, text_content=question)
|
||||
|
||||
|
||||
async def main():
|
||||
global llm
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-5-sonnet-20240620",
|
||||
enable_prompt_caching_beta=True,
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
llm.register_function("get_image", get_image)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "get_image",
|
||||
"description": "Get an image from the video stream.",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
"required": ["question"],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
system_prompt = """\
|
||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||
|
||||
Your response will be turned into speech so use only simple words and punctuation.
|
||||
|
||||
You have access to two tools: get_weather and get_image.
|
||||
|
||||
You can respond to questions about the weather using the get_weather tool.
|
||||
|
||||
You can answer questions about the user's video stream using the get_image tool. Some examples of phrases that \
|
||||
indicate you should use the get_image tool are:
|
||||
- What do you see?
|
||||
- What's in the video?
|
||||
- Can you describe the video?
|
||||
- Tell me about what you see.
|
||||
- Tell me something interesting about what you see.
|
||||
- What's happening in the video?
|
||||
|
||||
If you need to use a tool, simply use the tool. Do not tell the user the tool you are using. Be brief and concise.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": system_prompt,
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "user", "content": "Start the conversation by introducing yourself."},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User speech to text
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
global video_participant_id
|
||||
video_participant_id = participant["id"]
|
||||
transport.capture_participant_transcription(video_participant_id)
|
||||
transport.capture_participant_video(video_participant_id, framerate=0)
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
137
examples/foundational/19c-tools-togetherai.py
Normal file
137
examples/foundational/19c-tools-togetherai.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.together import TogetherLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def get_current_weather(
|
||||
function_name, tool_call_id, arguments, llm, context, result_callback
|
||||
):
|
||||
logger.debug("IN get_current_weather")
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = TogetherLLMService(
|
||||
api_key=os.getenv("TOGETHER_API_KEY"),
|
||||
model=os.getenv("TOGETHER_MODEL"),
|
||||
)
|
||||
llm.register_function("get_current_weather", get_current_weather)
|
||||
|
||||
weatherTool = {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
|
||||
system_prompt = f"""\
|
||||
You have access to the following functions:
|
||||
|
||||
Use the function '{weatherTool["name"]}' to '{weatherTool["description"]}':
|
||||
{json.dumps(weatherTool)}
|
||||
|
||||
If you choose to call a function ONLY reply in the following format with no prefix or suffix:
|
||||
|
||||
<function=example_function_name>{{\"example_name\": \"example_value\"}}</function>
|
||||
|
||||
Reminder:
|
||||
- Function calls MUST follow the specified format, start with <function= and end with </function>
|
||||
- Required parameters MUST be specified
|
||||
- Only call one function at a time
|
||||
- Put the entire function call reply on one line
|
||||
- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls
|
||||
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": "Wait for the user to say something."},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User speech to text
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,18 +1,29 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
(url, token, _) = await configure_with_args(aiohttp_session)
|
||||
return (url, token)
|
||||
|
||||
|
||||
async def configure_with_args(
|
||||
aiohttp_session: aiohttp.ClientSession, parser: argparse.ArgumentParser | None = None
|
||||
):
|
||||
if not parser:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +39,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return (url, token)
|
||||
return (url, token, args)
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package pipecat_proto;
|
||||
|
||||
message TextFrame {
|
||||
string text = 1;
|
||||
}
|
||||
|
||||
message AudioFrame {
|
||||
bytes audio = 1;
|
||||
}
|
||||
|
||||
message TranscriptionFrame {
|
||||
string text = 1;
|
||||
string participant_id = 2;
|
||||
string timestamp = 3;
|
||||
}
|
||||
|
||||
message Frame {
|
||||
oneof frame {
|
||||
TextFrame text = 1;
|
||||
AudioFrame audio = 2;
|
||||
TranscriptionFrame transcription = 3;
|
||||
}
|
||||
}
|
||||
@@ -1,134 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<script src="//cdn.jsdelivr.net/npm/protobufjs@7.X.X/dist/protobuf.min.js"></script>
|
||||
<title>WebSocket Audio Stream</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>WebSocket Audio Stream</h1>
|
||||
<button id="startAudioBtn">Start Audio</button>
|
||||
<button id="stopAudioBtn">Stop Audio</button>
|
||||
<script>
|
||||
const SAMPLE_RATE = 16000;
|
||||
const BUFFER_SIZE = 8192;
|
||||
const MIN_AUDIO_SIZE = 6400;
|
||||
|
||||
let audioContext;
|
||||
let microphoneStream;
|
||||
let scriptProcessor;
|
||||
let source;
|
||||
let frame;
|
||||
let audioChunks = [];
|
||||
let isPlaying = false;
|
||||
let ws;
|
||||
|
||||
const proto = protobuf.load("frames.proto", (err, root) => {
|
||||
if (err) throw err;
|
||||
frame = root.lookupType("pipecat_proto.Frame");
|
||||
});
|
||||
|
||||
function initWebSocket() {
|
||||
ws = new WebSocket('ws://localhost:8765');
|
||||
|
||||
ws.addEventListener('open', () => console.log('WebSocket connection established.'));
|
||||
ws.addEventListener('message', handleWebSocketMessage);
|
||||
ws.addEventListener('close', (event) => console.log("WebSocket connection closed.", event.code, event.reason));
|
||||
ws.addEventListener('error', (event) => console.error('WebSocket error:', event));
|
||||
}
|
||||
|
||||
async function handleWebSocketMessage(event) {
|
||||
const arrayBuffer = await event.data.arrayBuffer();
|
||||
enqueueAudioFromProto(arrayBuffer);
|
||||
}
|
||||
|
||||
function enqueueAudioFromProto(arrayBuffer) {
|
||||
const parsedFrame = frame.decode(new Uint8Array(arrayBuffer));
|
||||
if (!parsedFrame?.audio) return false;
|
||||
|
||||
const frameCount = parsedFrame.audio.data.length / 2;
|
||||
const audioOutBuffer = audioContext.createBuffer(1, frameCount, SAMPLE_RATE);
|
||||
const nowBuffering = audioOutBuffer.getChannelData(0);
|
||||
const view = new Int16Array(parsedFrame.audio.data.buffer);
|
||||
|
||||
for (let i = 0; i < frameCount; i++) {
|
||||
const word = view[i];
|
||||
nowBuffering[i] = ((word + 32768) % 65536 - 32768) / 32768.0;
|
||||
}
|
||||
|
||||
audioChunks.push(audioOutBuffer);
|
||||
if (!isPlaying) playNextChunk();
|
||||
}
|
||||
|
||||
function playNextChunk() {
|
||||
if (audioChunks.length === 0) {
|
||||
isPlaying = false;
|
||||
return;
|
||||
}
|
||||
|
||||
isPlaying = true;
|
||||
const audioOutBuffer = audioChunks.shift();
|
||||
const source = audioContext.createBufferSource();
|
||||
source.buffer = audioOutBuffer;
|
||||
source.connect(audioContext.destination);
|
||||
source.onended = playNextChunk;
|
||||
source.start();
|
||||
}
|
||||
|
||||
function startAudio() {
|
||||
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
||||
alert('getUserMedia is not supported in your browser.');
|
||||
return;
|
||||
}
|
||||
|
||||
navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
.then((stream) => {
|
||||
microphoneStream = stream;
|
||||
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
scriptProcessor = audioContext.createScriptProcessor(BUFFER_SIZE, 1, 1);
|
||||
source = audioContext.createMediaStreamSource(stream);
|
||||
source.connect(scriptProcessor);
|
||||
scriptProcessor.connect(audioContext.destination);
|
||||
|
||||
const audioBuffer = [];
|
||||
const skipRatio = Math.floor(audioContext.sampleRate / (SAMPLE_RATE * 2));
|
||||
|
||||
scriptProcessor.onaudioprocess = (event) => {
|
||||
const rawLeftChannelData = event.inputBuffer.getChannelData(0);
|
||||
for (let i = 0; i < rawLeftChannelData.length; i += skipRatio) {
|
||||
const normalized = ((rawLeftChannelData[i] * 32768.0) + 32768) % 65536 - 32768;
|
||||
const swappedBytes = ((normalized & 0xff) << 8) | ((normalized >> 8) & 0xff);
|
||||
audioBuffer.push(swappedBytes);
|
||||
}
|
||||
|
||||
if (audioBuffer.length >= MIN_AUDIO_SIZE) {
|
||||
const audioFrame = frame.create({ audio: { audio: audioBuffer.slice(0, MIN_AUDIO_SIZE) } });
|
||||
const encodedFrame = new Uint8Array(frame.encode(audioFrame).finish());
|
||||
ws.send(encodedFrame);
|
||||
audioBuffer.splice(0, MIN_AUDIO_SIZE);
|
||||
}
|
||||
};
|
||||
|
||||
initWebSocket();
|
||||
})
|
||||
.catch((error) => console.error('Error accessing microphone:', error));
|
||||
}
|
||||
|
||||
function stopAudio() {
|
||||
if (ws) {
|
||||
ws.close();
|
||||
scriptProcessor.disconnect();
|
||||
source.disconnect();
|
||||
ws = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById('startAudioBtn').addEventListener('click', startAudio);
|
||||
document.getElementById('stopAudioBtn').addEventListener('click', stopAudio);
|
||||
</script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
@@ -1,50 +0,0 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import logging
|
||||
import os
|
||||
from pipecat.pipeline.frame_processor import FrameProcessor
|
||||
from pipecat.pipeline.frames import TextFrame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService
|
||||
from pipecat.transports.websocket_transport import WebsocketTransport
|
||||
from pipecat.services.whisper_ai_services import WhisperSTTService
|
||||
|
||||
logging.basicConfig(format="%(levelno)s %(asctime)s %(message)s")
|
||||
logger = logging.getLogger("pipecat")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
class WhisperTranscriber(FrameProcessor):
|
||||
async def process_frame(self, frame):
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcribed: {frame.text}")
|
||||
else:
|
||||
yield frame
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = WebsocketTransport(
|
||||
mic_enabled=True,
|
||||
speaker_enabled=True,
|
||||
)
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([
|
||||
WhisperSTTService(),
|
||||
WhisperTranscriber(),
|
||||
tts,
|
||||
])
|
||||
|
||||
@transport.on_connection
|
||||
async def queue_frame():
|
||||
await pipeline.queue_frames([TextFrame("Hello there!")])
|
||||
|
||||
await transport.run(pipeline)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
@@ -7,10 +13,11 @@ from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
ImageRawFrame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
Frame,
|
||||
LLMMessagesFrame,
|
||||
AudioRawFrame,
|
||||
TTSAudioRawFrame,
|
||||
TTSStoppedFrame,
|
||||
TextFrame,
|
||||
UserImageRawFrame,
|
||||
@@ -25,17 +32,18 @@ from pipecat.processors.aggregators.llm_response import LLMUserResponseAggregato
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVAD
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -53,7 +61,7 @@ for i in range(1, 26):
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
@@ -66,7 +74,7 @@ talking_frame = SpriteFrame(images=sprites)
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""
|
||||
This class starts a talking animation when it receives an first AudioFrame,
|
||||
and then returns to a "quiet" sprite when it sees a LLMResponseEndFrame.
|
||||
and then returns to a "quiet" sprite when it sees a TTSStoppedFrame.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
@@ -74,7 +82,9 @@ class TalkingAnimation(FrameProcessor):
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TTSAudioRawFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
@@ -93,9 +103,13 @@ class UserImageRequester(FrameProcessor):
|
||||
self.participant_id = participant_id
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self.participant_id and isinstance(frame, TextFrame):
|
||||
if frame.text == user_request_answer:
|
||||
await self.push_frame(UserImageRequestFrame(self.participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self.participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(TextFrame("Describe the image in a short sentence."))
|
||||
elif isinstance(frame, UserImageRawFrame):
|
||||
await self.push_frame(frame)
|
||||
@@ -107,6 +121,8 @@ class TextFilterProcessor(FrameProcessor):
|
||||
self.text = text
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TextFrame):
|
||||
if frame.text != self.text:
|
||||
await self.push_frame(frame)
|
||||
@@ -116,37 +132,37 @@ class TextFilterProcessor(FrameProcessor):
|
||||
|
||||
class ImageFilterProcessor(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if not isinstance(frame, ImageRawFrame):
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
transcription_enabled=True
|
||||
)
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
vad = SileroVAD()
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
@@ -163,17 +179,23 @@ async def main(room_url: str, token):
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"You are Chatbot, a friendly, helpful robot. Let the user know that you are capable of chatting or describing what you see. Your goal is to demonstrate your capabilities in a succinct way. Reply with only '{user_request_answer}' if the user asks you to describe what you see. Your output will be converted to audio so never include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.",
|
||||
"content": f"You are Chatbot, a friendly, helpful robot. Let the user know that you are capable of chatting or describing what you see. Your goal is to demonstrate your capabilities in a succinct way. Reply with only '{user_request_answer}' if the user asks you to describe what you see. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.",
|
||||
},
|
||||
]
|
||||
|
||||
ura = LLMUserResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([transport.input(), vad, ura, llm,
|
||||
ParallelPipeline(
|
||||
[sa, ir, va, moondream],
|
||||
[tf, imgf]),
|
||||
tts, ta, transport.output()])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
ura,
|
||||
llm,
|
||||
ParallelPipeline([sa, ir, va, moondream], [tf, imgf]),
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frame(quiet_frame)
|
||||
@@ -191,5 +213,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,moondream,openai,silero]
|
||||
pipecat-ai[daily,cartesia,moondream,openai,silero]
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +31,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
@@ -1,31 +1,52 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -39,45 +60,42 @@ app.add_middleware(
|
||||
@app.get("/start")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room_url, room_name = _create_room()
|
||||
print(f"!!! Room URL: {room_url}")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
print(f"!!! Room URL: {room.url}")
|
||||
# Ensure the room property is present
|
||||
if not room_url:
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
|
||||
)
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
|
||||
)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
],
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
bot_procs[proc.pid] = (proc, room.url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
return RedirectResponse(room.url)
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
@@ -87,8 +105,7 @@ def get_status(pid: int):
|
||||
|
||||
# If the subprocess doesn't exist, return an error
|
||||
if not proc:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
|
||||
# Check the status of the subprocess
|
||||
if proc[0].poll() is None:
|
||||
@@ -105,14 +122,10 @@ if __name__ == "__main__":
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Moondream FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
parser = argparse.ArgumentParser(description="Daily Moondream FastAPI server")
|
||||
parser.add_argument("--host", type=str, default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int, default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
16
examples/patient-intake/Dockerfile
Normal file
16
examples/patient-intake/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
||||
FROM python:3.10-bullseye
|
||||
|
||||
RUN mkdir /app
|
||||
RUN mkdir /app/assets
|
||||
RUN mkdir /app/utils
|
||||
COPY *.py /app/
|
||||
COPY requirements.txt /app/
|
||||
copy assets/* /app/assets/
|
||||
copy utils/* /app/utils/
|
||||
|
||||
WORKDIR /app
|
||||
RUN pip3 install -r requirements.txt
|
||||
|
||||
EXPOSE 7860
|
||||
|
||||
CMD ["python3", "server.py"]
|
||||
37
examples/patient-intake/README.md
Normal file
37
examples/patient-intake/README.md
Normal file
@@ -0,0 +1,37 @@
|
||||
# Simple Chatbot
|
||||
|
||||
<img src="image.png" width="420px">
|
||||
|
||||
This app connects you to a chatbot powered by GPT-4, complete with animations generated by Stable Video Diffusion.
|
||||
|
||||
See a video of it in action: https://x.com/kwindla/status/1778628911817183509
|
||||
|
||||
And a quick video walkthrough of the code: https://www.loom.com/share/13df1967161f4d24ade054e7f8753416
|
||||
|
||||
ℹ️ The first time, things might take extra time to get started since VAD (Voice Activity Detection) model needs to be downloaded.
|
||||
|
||||
## Get started
|
||||
|
||||
```python
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
cp env.example .env # and add your credentials
|
||||
|
||||
```
|
||||
|
||||
## Run the server
|
||||
|
||||
```bash
|
||||
python server.py
|
||||
```
|
||||
|
||||
Then, visit `http://localhost:7860/start` in your browser to start a chatbot session.
|
||||
|
||||
## Build and test the Docker image
|
||||
|
||||
```
|
||||
docker build -t chatbot .
|
||||
docker run --env-file .env -p 7860:7860 chatbot
|
||||
```
|
||||
BIN
examples/patient-intake/assets/clack-short-quiet.wav
Normal file
BIN
examples/patient-intake/assets/clack-short-quiet.wav
Normal file
Binary file not shown.
BIN
examples/patient-intake/assets/clack-short.wav
Normal file
BIN
examples/patient-intake/assets/clack-short.wav
Normal file
Binary file not shown.
BIN
examples/patient-intake/assets/clack.wav
Normal file
BIN
examples/patient-intake/assets/clack.wav
Normal file
Binary file not shown.
BIN
examples/patient-intake/assets/ding.wav
Normal file
BIN
examples/patient-intake/assets/ding.wav
Normal file
Binary file not shown.
BIN
examples/patient-intake/assets/ding2.wav
Normal file
BIN
examples/patient-intake/assets/ding2.wav
Normal file
Binary file not shown.
BIN
examples/patient-intake/assets/ding3.wav
Normal file
BIN
examples/patient-intake/assets/ding3.wav
Normal file
Binary file not shown.
365
examples/patient-intake/bot.py
Normal file
365
examples/patient-intake/bot.py
Normal file
@@ -0,0 +1,365 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import wave
|
||||
|
||||
from pipecat.frames.frames import OutputAudioRawFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMContextFrame, OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
sounds = {}
|
||||
sound_files = [
|
||||
"clack-short.wav",
|
||||
"clack.wav",
|
||||
"clack-short-quiet.wav",
|
||||
"ding.wav",
|
||||
"ding2.wav",
|
||||
]
|
||||
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
for file in sound_files:
|
||||
# Build the full path to the sound file
|
||||
full_path = os.path.join(script_dir, "assets", file)
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the sound and convert it to bytes
|
||||
with wave.open(full_path) as audio_file:
|
||||
sounds[file] = OutputAudioRawFrame(
|
||||
audio_file.readframes(-1), audio_file.getframerate(), audio_file.getnchannels()
|
||||
)
|
||||
|
||||
|
||||
class IntakeProcessor:
|
||||
def __init__(self, context: OpenAILLMContext):
|
||||
print(f"Initializing context from IntakeProcessor")
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Jessica, an agent for a company called Tri-County Health Services. Your job is to collect important information from the user before their doctor visit. You're talking to Chad Bailey. You should address the user by their first name and be polite and professional. You're not a medical professional, so you shouldn't provide any advice. Keep your responses short. Your job is to collect information to give to a doctor. Don't make assumptions about what values to plug into functions. Ask for clarification if a user response is ambiguous. Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function.",
|
||||
}
|
||||
)
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "verify_birthday",
|
||||
"description": "Use this function to verify the user has provided their correct birthday.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"birthday": {
|
||||
"type": "string",
|
||||
"description": "The user's birthdate, including the year. The user can provide it in any format, but convert it to YYYY-MM-DD format to call this function.",
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
async def verify_birthday(
|
||||
self, function_name, tool_call_id, args, llm, context, result_callback
|
||||
):
|
||||
if args["birthday"] == "1983-01-01":
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "list_prescriptions",
|
||||
"description": "Once the user has provided a list of their prescription medications, call this function.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prescriptions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"medication": {
|
||||
"type": "string",
|
||||
"description": "The medication's name",
|
||||
},
|
||||
"dosage": {
|
||||
"type": "string",
|
||||
"description": "The prescription's dosage",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
# It's a bit weird to push this to the LLM, but it gets it into the pipeline
|
||||
# await llm.push_frame(sounds["ding2.wav"], FrameDirection.DOWNSTREAM)
|
||||
# We don't need the function call in the context, so just return a new
|
||||
# system message and let the framework re-prompt
|
||||
await result_callback(
|
||||
[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages.",
|
||||
}
|
||||
]
|
||||
)
|
||||
else:
|
||||
# The user provided an incorrect birthday; ask them to try again
|
||||
await result_callback(
|
||||
[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function.",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
async def start_prescriptions(self, function_name, llm, context):
|
||||
print(f"!!! doing start prescriptions")
|
||||
# Move on to allergies
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "list_allergies",
|
||||
"description": "Once the user has provided a list of their allergies, call this function.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"allergies": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "What the user is allergic to",
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Next, ask the user if they have any allergies. Once they have listed their allergies or confirmed they don't have any, call the list_allergies function.",
|
||||
}
|
||||
)
|
||||
print(f"!!! about to await llm process frame in start prescrpitions")
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
print(f"!!! past await process frame in start prescriptions")
|
||||
|
||||
async def start_allergies(self, function_name, llm, context):
|
||||
print("!!! doing start allergies")
|
||||
# Move on to conditions
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "list_conditions",
|
||||
"description": "Once the user has provided a list of their medical conditions, call this function.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"conditions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The user's medical condition",
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Now ask the user if they have any medical conditions the doctor should know about. Once they've answered the question, call the list_conditions function.",
|
||||
}
|
||||
)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def start_conditions(self, function_name, llm, context):
|
||||
print("!!! doing start conditions")
|
||||
# Move on to visit reasons
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "list_visit_reasons",
|
||||
"description": "Once the user has provided a list of the reasons they are visiting a doctor today, call this function.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"visit_reasons": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The user's reason for visiting the doctor",
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function.",
|
||||
}
|
||||
)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def start_visit_reasons(self, function_name, llm, context):
|
||||
print("!!! doing start visit reasons")
|
||||
# move to finish call
|
||||
context.set_tools([])
|
||||
context.add_message(
|
||||
{"role": "system", "content": "Now, thank the user and end the conversation."}
|
||||
)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def save_data(self, function_name, tool_call_id, args, llm, context, result_callback):
|
||||
logger.info(f"!!! Saving data: {args}")
|
||||
# Since this is supposed to be "async", returning None from the callback
|
||||
# will prevent adding anything to context or re-prompting
|
||||
await result_callback(None)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# transcription_settings=DailyTranscriptionSettings(
|
||||
# language="es",
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
# tts = CartesiaTTSService(
|
||||
# api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
# voice_id="846d6cb0-2301-48b6-9683-48f5618ea2f6", # Spanish-speaking Lady
|
||||
# )
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = []
|
||||
context = OpenAILLMContext(messages=messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
intake = IntakeProcessor(context)
|
||||
llm.register_function("verify_birthday", intake.verify_birthday)
|
||||
llm.register_function(
|
||||
"list_prescriptions", intake.save_data, start_callback=intake.start_prescriptions
|
||||
)
|
||||
llm.register_function(
|
||||
"list_allergies", intake.save_data, start_callback=intake.start_allergies
|
||||
)
|
||||
llm.register_function(
|
||||
"list_conditions", intake.save_data, start_callback=intake.start_conditions
|
||||
)
|
||||
llm.register_function(
|
||||
"list_visit_reasons", intake.save_data, start_callback=intake.start_visit_reasons
|
||||
)
|
||||
|
||||
fl = FrameLogger("LLM Output")
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
fl, # Frame logger
|
||||
tts, # TTS
|
||||
transport.output(), # Transport output
|
||||
context_aggregator.assistant(), # Assistant responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=False))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
print(f"Context is: {context}")
|
||||
await task.queue_frames([OpenAILLMContextFrame(context)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
4
examples/patient-intake/env.example
Normal file
4
examples/patient-intake/env.example
Normal file
@@ -0,0 +1,4 @@
|
||||
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
|
||||
DAILY_API_KEY=7df...
|
||||
OPENAI_API_KEY=sk-PL...
|
||||
ELEVENLABS_API_KEY=aeb...
|
||||
BIN
examples/patient-intake/image.png
Normal file
BIN
examples/patient-intake/image.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 733 KiB |
4
examples/patient-intake/requirements.txt
Normal file
4
examples/patient-intake/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,cartesia,openai,silero]
|
||||
54
examples/patient-intake/runner.py
Normal file
54
examples/patient-intake/runner.py
Normal file
@@ -0,0 +1,54 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Daily API Key (needed to create an owner token for the room)",
|
||||
)
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
|
||||
key = args.apikey or os.getenv("DAILY_API_KEY")
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user