Compare commits
733 Commits
update-mod
...
v0.0.70
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e1735a2da1 | ||
|
|
c101c9c8e1 | ||
|
|
96dc162de5 | ||
|
|
03eb22fe0a | ||
|
|
0bb61d72ab | ||
|
|
f758508a82 | ||
|
|
69d0218d7e | ||
|
|
eb5e5ab1df | ||
|
|
093697906c | ||
|
|
efe96b7ed1 | ||
|
|
7ecdd41ab9 | ||
|
|
aec70d61e9 | ||
|
|
2efac13344 | ||
|
|
15aeb11c36 | ||
|
|
e705f4d984 | ||
|
|
96fa62fdfe | ||
|
|
845c70797a | ||
|
|
16048956c3 | ||
|
|
cf2f4b5902 | ||
|
|
db46f33f34 | ||
|
|
25d1515daf | ||
|
|
a3469cd59f | ||
|
|
513ce26200 | ||
|
|
1cd96f94ff | ||
|
|
901dd041f0 | ||
|
|
a2ee94651e | ||
|
|
abdce063f1 | ||
|
|
a33ce5e4bf | ||
|
|
c9575eaef9 | ||
|
|
1e74476a71 | ||
|
|
82935884c4 | ||
|
|
d774a23768 | ||
|
|
e9f041e170 | ||
|
|
1f51b6e4f1 | ||
|
|
028650249c | ||
|
|
534197239f | ||
|
|
d2f4bb574c | ||
|
|
07fb1a2c39 | ||
|
|
581b800c43 | ||
|
|
30ca39287f | ||
|
|
01fa9698de | ||
|
|
10bd969636 | ||
|
|
f7761f2b61 | ||
|
|
49ff38a21f | ||
|
|
8d161306c7 | ||
|
|
027a82dff1 | ||
|
|
cb409d58e0 | ||
|
|
094e2f8151 | ||
|
|
71d121aeb9 | ||
|
|
b1a88af43c | ||
|
|
f73eb4ebd9 | ||
|
|
31ca9be299 | ||
|
|
1642c082d1 | ||
|
|
892d213442 | ||
|
|
fc24267e09 | ||
|
|
9b71bdc608 | ||
|
|
310be89895 | ||
|
|
71fbd57e12 | ||
|
|
ab4b48c823 | ||
|
|
532767cfa1 | ||
|
|
5512de3221 | ||
|
|
13546d5e8f | ||
|
|
c6f1aa8086 | ||
|
|
5606c47cb7 | ||
|
|
7f7cd96211 | ||
|
|
b828bfd890 | ||
|
|
31d084eb78 | ||
|
|
ab18b280e9 | ||
|
|
24e89c4081 | ||
|
|
e129390f56 | ||
|
|
4d7c87bb4c | ||
|
|
dac3f82a75 | ||
|
|
fd860921f1 | ||
|
|
0482ccd48b | ||
|
|
b8b1990617 | ||
|
|
70951b1198 | ||
|
|
6d24514ace | ||
|
|
49915ceb84 | ||
|
|
925b13e337 | ||
|
|
ef3143d558 | ||
|
|
ed84637b55 | ||
|
|
897a944478 | ||
|
|
d86343c38d | ||
|
|
297afdd126 | ||
|
|
f0cbdc4e68 | ||
|
|
40b52cadde | ||
|
|
04bf85ddfe | ||
|
|
4809684a13 | ||
|
|
1eb50ad88f | ||
|
|
52569bcdb2 | ||
|
|
a50a407415 | ||
|
|
9f223442c2 | ||
|
|
c647114bb9 | ||
|
|
43719ec737 | ||
|
|
8602557985 | ||
|
|
dd1f7d0875 | ||
|
|
ec39e794d3 | ||
|
|
7b1a937d4c | ||
|
|
0fd38d8115 | ||
|
|
7a4efc6212 | ||
|
|
2eb2c5a413 | ||
|
|
2fcfb0aa9f | ||
|
|
f1df079512 | ||
|
|
d77bedbafb | ||
|
|
b34c593c54 | ||
|
|
62efbc3342 | ||
|
|
2d609a0bde | ||
|
|
6bc4b4a17f | ||
|
|
b489e52080 | ||
|
|
a8aaeec52b | ||
|
|
ad7eec181e | ||
|
|
b33897ffb9 | ||
|
|
1c3d3f2f4b | ||
|
|
9a5a1edb6b | ||
|
|
f2eb869b02 | ||
|
|
0c7e3cfcb2 | ||
|
|
24e19db29e | ||
|
|
bc6d7b7bbd | ||
|
|
cad271068e | ||
|
|
3425293115 | ||
|
|
20dbfec3a9 | ||
|
|
170057a75a | ||
|
|
b86b761e0b | ||
|
|
da0d2f0266 | ||
|
|
321ea27c34 | ||
|
|
b712e6b9aa | ||
|
|
b3652e6527 | ||
|
|
bc97f397ef | ||
|
|
e5da3f6e68 | ||
|
|
8400539acf | ||
|
|
b5eac8dfed | ||
|
|
ba312b5591 | ||
|
|
f23572b318 | ||
|
|
db838634e7 | ||
|
|
7f2e848a5c | ||
|
|
096e854d50 | ||
|
|
3ffe8b3155 | ||
|
|
a471f49b61 | ||
|
|
4d2a02f318 | ||
|
|
0bec7db03b | ||
|
|
74827f983f | ||
|
|
0ed46f457e | ||
|
|
36b731be73 | ||
|
|
62fbdd4e81 | ||
|
|
ca7b0650c2 | ||
|
|
67dd146038 | ||
|
|
fb66df2efd | ||
|
|
2395ca0057 | ||
|
|
d203789490 | ||
|
|
7ea0e31cd4 | ||
|
|
d3bf13a503 | ||
|
|
ea91970499 | ||
|
|
803b3f2cc4 | ||
|
|
1788ba6c5c | ||
|
|
5209bd3d9f | ||
|
|
cb9178f1ec | ||
|
|
5676920a6a | ||
|
|
513221d9fd | ||
|
|
a33d0b4b53 | ||
|
|
bee242b781 | ||
|
|
fa1c98ff29 | ||
|
|
ae3a7d9bed | ||
|
|
0c2efb312c | ||
|
|
cf8eeaab0b | ||
|
|
2f8cb3ce76 | ||
|
|
821da723c0 | ||
|
|
575b97ba60 | ||
|
|
cc0819b709 | ||
|
|
318d6f042b | ||
|
|
05ae3a1703 | ||
|
|
8e54805e62 | ||
|
|
64399a72f3 | ||
|
|
6c33f0b0bd | ||
|
|
aca304b395 | ||
|
|
db5c9e67be | ||
|
|
2313cec792 | ||
|
|
83acaf692a | ||
|
|
e9aeb2662b | ||
|
|
356f4039e4 | ||
|
|
736c7f1f30 | ||
|
|
2994448036 | ||
|
|
d476d9ea05 | ||
|
|
bf31bce440 | ||
|
|
6393e89022 | ||
|
|
884268fce3 | ||
|
|
071a9307c9 | ||
|
|
2cdfaa0a82 | ||
|
|
ecf878e14d | ||
|
|
4eed335bc7 | ||
|
|
2e57bb74d2 | ||
|
|
0a39769cd0 | ||
|
|
bdb6a9e5d1 | ||
|
|
f88e0eb96d | ||
|
|
0099f60d29 | ||
|
|
eaf9f20c56 | ||
|
|
e987c4741a | ||
|
|
6242278abd | ||
|
|
30850a431a | ||
|
|
1e7407c042 | ||
|
|
6d94f31ff2 | ||
|
|
ebb3d1cfd3 | ||
|
|
acce9489d7 | ||
|
|
3d442620f9 | ||
|
|
f1d7eb8565 | ||
|
|
798b935ff6 | ||
|
|
3039a1444e | ||
|
|
aa7d15beb3 | ||
|
|
2b3d2cb342 | ||
|
|
5a58357429 | ||
|
|
366add2536 | ||
|
|
e13c9fd42e | ||
|
|
2a6c01f634 | ||
|
|
bf29722e78 | ||
|
|
db227ad15f | ||
|
|
514716042b | ||
|
|
7a767e680c | ||
|
|
320b52eb1e | ||
|
|
428cee75c5 | ||
|
|
5479a55b2c | ||
|
|
d1f2a5d04f | ||
|
|
09ba319f3e | ||
|
|
6f524fb816 | ||
|
|
d3e2a9e5c0 | ||
|
|
b4cd7d7941 | ||
|
|
cd03b91115 | ||
|
|
f86d002ceb | ||
|
|
940926b5ec | ||
|
|
85c096df0b | ||
|
|
76d93522ac | ||
|
|
31492831cc | ||
|
|
8221dd594e | ||
|
|
6346ca1a84 | ||
|
|
4a3404883f | ||
|
|
1ebca35313 | ||
|
|
e0d1381f87 | ||
|
|
86e6841569 | ||
|
|
28b7a92a00 | ||
|
|
4db5b18694 | ||
|
|
a628e921c0 | ||
|
|
6ca6ff37c9 | ||
|
|
456db3710a | ||
|
|
50f024c6f9 | ||
|
|
a4de75a8c0 | ||
|
|
88e8fcdaca | ||
|
|
bfe9952c9a | ||
|
|
7f568e3e7e | ||
|
|
9b8800ac1d | ||
|
|
fd53712567 | ||
|
|
7f74c2465c | ||
|
|
30d67a78eb | ||
|
|
c3cfd1f0ce | ||
|
|
69ac70eed8 | ||
|
|
fcf49e79cc | ||
|
|
8d4894846d | ||
|
|
a809b710c5 | ||
|
|
f6289e9db2 | ||
|
|
26b4c4df22 | ||
|
|
f3a9844295 | ||
|
|
692821bdae | ||
|
|
ee143d5b3a | ||
|
|
7e178a634a | ||
|
|
fe88a3d80b | ||
|
|
a196eac290 | ||
|
|
3c819955a2 | ||
|
|
ca0d7bbbed | ||
|
|
f93bd1e817 | ||
|
|
415bc6ca0a | ||
|
|
8543c8d11d | ||
|
|
bf5ad64575 | ||
|
|
d42d02d809 | ||
|
|
0718f79ff2 | ||
|
|
9bbce225ce | ||
|
|
fb35fd6d71 | ||
|
|
b4fd92aed6 | ||
|
|
36931825b3 | ||
|
|
ca35299dcd | ||
|
|
e74b900914 | ||
|
|
25115668a7 | ||
|
|
fb94db3e64 | ||
|
|
c4778e770e | ||
|
|
3860cdf97b | ||
|
|
f3aec0c4ac | ||
|
|
d333094149 | ||
|
|
609ff4e66c | ||
|
|
cbccbcd9e7 | ||
|
|
54b1d7fcc1 | ||
|
|
54388c0d9b | ||
|
|
228c866aaa | ||
|
|
a09bd648af | ||
|
|
3e4ae61c75 | ||
|
|
7655c432c2 | ||
|
|
25dd651757 | ||
|
|
462aecea3e | ||
|
|
5f37df790b | ||
|
|
8e4e03541c | ||
|
|
c1252fc7eb | ||
|
|
ed1077cc9a | ||
|
|
4c761a7b22 | ||
|
|
9bc3df7803 | ||
|
|
5e5060a6fe | ||
|
|
2b66eddaa1 | ||
|
|
916b9d6c6d | ||
|
|
bd09ccd608 | ||
|
|
682f8e4d45 | ||
|
|
c9d0af9ee0 | ||
|
|
e1299d59bf | ||
|
|
61da6437ea | ||
|
|
798705469b | ||
|
|
459a753de3 | ||
|
|
1092ce70b3 | ||
|
|
9511c189bd | ||
|
|
66fea9e2ee | ||
|
|
69ae83516e | ||
|
|
144ea36c81 | ||
|
|
7a8ab9a900 | ||
|
|
c4b35055b4 | ||
|
|
a4c04e7c17 | ||
|
|
a6f7e7fc30 | ||
|
|
d5ebc883b3 | ||
|
|
deb43df0a4 | ||
|
|
88e472b3f1 | ||
|
|
f59fb8167d | ||
|
|
fac6f526f7 | ||
|
|
2f78d74ce6 | ||
|
|
d3942dda52 | ||
|
|
c00e9a8d3a | ||
|
|
c3b95767f3 | ||
|
|
90f27a3090 | ||
|
|
b6f09defc9 | ||
|
|
172813bcfb | ||
|
|
95c25efab7 | ||
|
|
a51af35024 | ||
|
|
119fd5ba7d | ||
|
|
0718a812bd | ||
|
|
3814501b48 | ||
|
|
7a5205dbda | ||
|
|
15a5028d23 | ||
|
|
fee2648ac0 | ||
|
|
04c02c9a20 | ||
|
|
0ff7195a83 | ||
|
|
3b91aa013a | ||
|
|
50f6235edb | ||
|
|
6f4d94f91b | ||
|
|
83a4c7d443 | ||
|
|
8171fec925 | ||
|
|
175f352ea7 | ||
|
|
5290161ac4 | ||
|
|
8762019ed7 | ||
|
|
61a59fa158 | ||
|
|
55eea20c8e | ||
|
|
9a621f0c54 | ||
|
|
55fc24e933 | ||
|
|
b14608f09b | ||
|
|
4a25c57337 | ||
|
|
f800e35ccb | ||
|
|
12d49a9b9d | ||
|
|
b25b251a44 | ||
|
|
64b2a75a94 | ||
|
|
b33a60f3a5 | ||
|
|
d22dbb1a6d | ||
|
|
983199a6cd | ||
|
|
133d7ee33a | ||
|
|
0bd888afc7 | ||
|
|
537bd1c58d | ||
|
|
5ef519fe2c | ||
|
|
20498fb47f | ||
|
|
b57dfb3b5d | ||
|
|
0355ed4aa1 | ||
|
|
1e76cc7bdc | ||
|
|
18c0374126 | ||
|
|
7072fba7e7 | ||
|
|
3d702a5c39 | ||
|
|
f31efa42c9 | ||
|
|
74b369ff20 | ||
|
|
46eed0a59a | ||
|
|
9643296e29 | ||
|
|
c83c5b5a34 | ||
|
|
277e2d7fc0 | ||
|
|
7280e390d9 | ||
|
|
4efc3f0a39 | ||
|
|
cb7e7a8aa3 | ||
|
|
9136402846 | ||
|
|
260fc76137 | ||
|
|
7cfb9a4d15 | ||
|
|
2089e0c974 | ||
|
|
9e0b4fe5d1 | ||
|
|
75ce632f84 | ||
|
|
efeb96c4e8 | ||
|
|
fb5438e9c2 | ||
|
|
7da9f66e1c | ||
|
|
9e16e3d614 | ||
|
|
84d040c6d0 | ||
|
|
f3e0beb8f1 | ||
|
|
e00a1196ef | ||
|
|
3867c0f8e7 | ||
|
|
cdf0953722 | ||
|
|
ed00f7d071 | ||
|
|
a3038afa02 | ||
|
|
f9ca0b8cc6 | ||
|
|
2920aa5af4 | ||
|
|
93c9cc4a0e | ||
|
|
b53f9235e4 | ||
|
|
1491462d15 | ||
|
|
c78f779800 | ||
|
|
b013e375fb | ||
|
|
52036138c1 | ||
|
|
4ba9a42861 | ||
|
|
27bff7a759 | ||
|
|
896f8d85f7 | ||
|
|
ed06cdd2c7 | ||
|
|
8473647269 | ||
|
|
5579145a06 | ||
|
|
35848d10b3 | ||
|
|
c7e223e85a | ||
|
|
885b2d1d2f | ||
|
|
73020be511 | ||
|
|
d388c057c0 | ||
|
|
c4d0f91a7f | ||
|
|
467233be04 | ||
|
|
2b02d08f4c | ||
|
|
9fe265ea64 | ||
|
|
cc1f4ba81c | ||
|
|
3784bdbd27 | ||
|
|
4ffdc3b77c | ||
|
|
38c9fa681a | ||
|
|
c477039954 | ||
|
|
d6ef3d64ac | ||
|
|
6938152db6 | ||
|
|
2154db07f0 | ||
|
|
5e0803479e | ||
|
|
3960c604a4 | ||
|
|
394648f1c9 | ||
|
|
da5c4953d5 | ||
|
|
2b7e1cb5b1 | ||
|
|
f182eafb40 | ||
|
|
9f7f42e885 | ||
|
|
9b8bce1914 | ||
|
|
96d05e12fc | ||
|
|
68c1069548 | ||
|
|
5b64613f65 | ||
|
|
1f9baefba8 | ||
|
|
0c255d2618 | ||
|
|
a38206de9c | ||
|
|
260f7c9b85 | ||
|
|
de294caed9 | ||
|
|
e40aa4f99a | ||
|
|
b1d413b9be | ||
|
|
8cbad070ad | ||
|
|
13569a5a5a | ||
|
|
d789334a60 | ||
|
|
7668b27fc0 | ||
|
|
6d30f441e8 | ||
|
|
a9e395b366 | ||
|
|
5e5626f04f | ||
|
|
d80aa5b44e | ||
|
|
80ef6dc4de | ||
|
|
458549f7df | ||
|
|
a8405649d0 | ||
|
|
ce1a72850b | ||
|
|
58de381746 | ||
|
|
bed2e894a2 | ||
|
|
b4de98cfb7 | ||
|
|
a4b9db9e07 | ||
|
|
664111a3c9 | ||
|
|
aa964847f3 | ||
|
|
fa5cac7e0a | ||
|
|
b2b01861b2 | ||
|
|
f014f718eb | ||
|
|
05ae8d3ffa | ||
|
|
88c9e08bd8 | ||
|
|
844f61dfea | ||
|
|
acb7d597cb | ||
|
|
2b18f60261 | ||
|
|
5b66133a6c | ||
|
|
0c5bc6a57a | ||
|
|
7981e00955 | ||
|
|
5e39c0cfeb | ||
|
|
a444701929 | ||
|
|
f6c1eb5d9d | ||
|
|
a1d46cb26b | ||
|
|
99ab148d88 | ||
|
|
d69fa5dba5 | ||
|
|
0d30b000af | ||
|
|
e7c0e742d2 | ||
|
|
2aff2dcca3 | ||
|
|
288f8865c8 | ||
|
|
8691870bcb | ||
|
|
e06146c237 | ||
|
|
c68e990cda | ||
|
|
4583905313 | ||
|
|
9cc498b1fa | ||
|
|
b3c5dc4045 | ||
|
|
56ca7360ae | ||
|
|
d5ab3251f0 | ||
|
|
915c284420 | ||
|
|
3824da7261 | ||
|
|
40154824e8 | ||
|
|
855d567b1e | ||
|
|
b323a7bd88 | ||
|
|
fa011d0018 | ||
|
|
e15fa8777a | ||
|
|
2143a6d927 | ||
|
|
044e2d3e73 | ||
|
|
be112ec63f | ||
|
|
d2f56c4e8f | ||
|
|
ddc6a9c695 | ||
|
|
2bebdbc371 | ||
|
|
8b9f1f0608 | ||
|
|
b25f3b2ed2 | ||
|
|
a995cf81b6 | ||
|
|
75d261639f | ||
|
|
f720d795d0 | ||
|
|
f6fe83e358 | ||
|
|
0513d0b6a8 | ||
|
|
0679bb217d | ||
|
|
38bd55e518 | ||
|
|
65c7423280 | ||
|
|
f24a85cc94 | ||
|
|
53887b7c98 | ||
|
|
523c012c38 | ||
|
|
97c28989c1 | ||
|
|
c19be6ebb2 | ||
|
|
54971a0735 | ||
|
|
4513e81e13 | ||
|
|
872204b795 | ||
|
|
a94cbfe6f5 | ||
|
|
7152faafb2 | ||
|
|
e6aadaccd8 | ||
|
|
3a73aa71b8 | ||
|
|
814e7509e1 | ||
|
|
e0cf5ec016 | ||
|
|
667bd32e6a | ||
|
|
b2ecd83706 | ||
|
|
b2754117c8 | ||
|
|
6c428c303b | ||
|
|
e7d889a143 | ||
|
|
da60e7069b | ||
|
|
c14406a3b9 | ||
|
|
725ab5ec21 | ||
|
|
daf9d47e58 | ||
|
|
63a65627a2 | ||
|
|
02c07755b0 | ||
|
|
15cbd18acc | ||
|
|
93c40b87dc | ||
|
|
eeaa9f67a1 | ||
|
|
b60691c7b2 | ||
|
|
2bb1b0b343 | ||
|
|
047ef9f86c | ||
|
|
9a2c603c91 | ||
|
|
94c4169407 | ||
|
|
cb8a551db8 | ||
|
|
779f09af70 | ||
|
|
19dc0f2bfb | ||
|
|
f0709e22ba | ||
|
|
8250736f5e | ||
|
|
83348a9f93 | ||
|
|
96d40903a9 | ||
|
|
2560811805 | ||
|
|
2b8c44c008 | ||
|
|
38e2d37674 | ||
|
|
6278561f88 | ||
|
|
750e79c1ce | ||
|
|
71eb2963c5 | ||
|
|
f44e2c86ea | ||
|
|
afe1f0df8c | ||
|
|
458fddfb48 | ||
|
|
8d915c5ccb | ||
|
|
304153dd03 | ||
|
|
a6781b7352 | ||
|
|
5ad0058303 | ||
|
|
75c039de33 | ||
|
|
74e3c3677e | ||
|
|
dc20327f10 | ||
|
|
e738affd29 | ||
|
|
ef3d732607 | ||
|
|
6d63cff1bf | ||
|
|
12f42605a1 | ||
|
|
fac3337927 | ||
|
|
76d198151c | ||
|
|
6a907058de | ||
|
|
6e1f531f64 | ||
|
|
4232cca5b6 | ||
|
|
a6a4d3d71f | ||
|
|
c52de0f5de | ||
|
|
a1e1255f16 | ||
|
|
c4f758725e | ||
|
|
7bc9a78ce6 | ||
|
|
f8be71b32c | ||
|
|
957fa5546d | ||
|
|
039cb8fcae | ||
|
|
8e05f2f1a1 | ||
|
|
8467aa1ed3 | ||
|
|
9c5878af3d | ||
|
|
ef29800fe9 | ||
|
|
7e09933070 | ||
|
|
82a9d7f992 | ||
|
|
facbebb15f | ||
|
|
2ba60fc41f | ||
|
|
685f951ae2 | ||
|
|
27d4c927a8 | ||
|
|
20a59e8c56 | ||
|
|
d9a0a93667 | ||
|
|
154d5d1859 | ||
|
|
a192217256 | ||
|
|
10cdc47e05 | ||
|
|
2b4d41a548 | ||
|
|
962f8062a5 | ||
|
|
d80d385b2f | ||
|
|
b347ca472f | ||
|
|
c3c4952abf | ||
|
|
f369ab4c1a | ||
|
|
62b41c6789 | ||
|
|
2872bc7902 | ||
|
|
9658b75a10 | ||
|
|
63de9039e6 | ||
|
|
9352396d7e | ||
|
|
d1ab1d38b7 | ||
|
|
080f70d91c | ||
|
|
ebed1fc6ea | ||
|
|
6821b1cdab | ||
|
|
144ae9b611 | ||
|
|
a2e7331ce2 | ||
|
|
8accd3e387 | ||
|
|
3d05a74dc0 | ||
|
|
e3c965f4d5 | ||
|
|
5354e5d891 | ||
|
|
5784e91cff | ||
|
|
bc5f098aaa | ||
|
|
93534b4692 | ||
|
|
b23d54c609 | ||
|
|
aa23a7b1e6 | ||
|
|
c0c41789ab | ||
|
|
cf2f249f8a | ||
|
|
029ef4f8c2 | ||
|
|
9cad6dfce9 | ||
|
|
4df6444832 | ||
|
|
944bc23135 | ||
|
|
1d863ee7de | ||
|
|
9ca775d1ab | ||
|
|
03002ad685 | ||
|
|
99a4154cbc | ||
|
|
0f68cc182d | ||
|
|
3ac50b9902 | ||
|
|
9557705b53 | ||
|
|
a7718926e9 | ||
|
|
dfa10af6ed | ||
|
|
8485ea6c5e | ||
|
|
b298376766 | ||
|
|
7cfefe4f84 | ||
|
|
71c7373987 | ||
|
|
d1086914fe | ||
|
|
2fb85941d3 | ||
|
|
be8788e4da | ||
|
|
acb6abd761 | ||
|
|
a528aad957 | ||
|
|
5c13252801 | ||
|
|
a4422ac6c2 | ||
|
|
985a031353 | ||
|
|
5c4079b286 | ||
|
|
5489ac5a73 | ||
|
|
49fbcc86ac | ||
|
|
d20c3307b9 | ||
|
|
9fd76923fd | ||
|
|
a753a623d4 | ||
|
|
4ee6c4b59e | ||
|
|
e79a002e5a | ||
|
|
420912dd4b | ||
|
|
de7185e8db | ||
|
|
8bfcfe8b1d | ||
|
|
26d2ce5926 | ||
|
|
9ee56bff9e | ||
|
|
2e0d77e4f0 | ||
|
|
4cc8a4312c | ||
|
|
cb7cb381aa | ||
|
|
b29ffeef29 | ||
|
|
b7b2a5b7a1 | ||
|
|
3384598e07 | ||
|
|
c420dbe57f | ||
|
|
fa8aafc7a5 | ||
|
|
4b364dda29 | ||
|
|
6bb765e40f | ||
|
|
c80d09f66c | ||
|
|
f8ff10c5d5 | ||
|
|
09ff836ef6 | ||
|
|
e446ecac14 | ||
|
|
8c0c8a6153 | ||
|
|
70033ae00b | ||
|
|
ac9dce63ae | ||
|
|
8b2df48fab | ||
|
|
156a5690fc | ||
|
|
d42c618398 | ||
|
|
b23ca5a4a8 | ||
|
|
63a6697a90 | ||
|
|
f1e45d0f02 | ||
|
|
4ad227ca2d | ||
|
|
66cc18194b | ||
|
|
7d65132c93 | ||
|
|
db7d7a4204 | ||
|
|
7bbac11084 | ||
|
|
76c8322b57 | ||
|
|
7b1cd3523d | ||
|
|
6bd821ac9a | ||
|
|
a6d51c343e | ||
|
|
1a5cf7a521 | ||
|
|
69491417ec | ||
|
|
b91780ced2 | ||
|
|
8ded666958 | ||
|
|
2490c804a5 | ||
|
|
dd8856a673 | ||
|
|
e7da08dab1 | ||
|
|
ae60d42016 | ||
|
|
50e8d82ece | ||
|
|
cc9901a82f | ||
|
|
1fd43e8a3f | ||
|
|
fdc508a1a5 | ||
|
|
37269db247 | ||
|
|
51269aabbd | ||
|
|
74ecc19e09 | ||
|
|
c6d48c16df | ||
|
|
873d84aa09 | ||
|
|
7360866c97 | ||
|
|
81f4768661 | ||
|
|
972d65f61b | ||
|
|
1da9d398e3 | ||
|
|
7358bc6428 | ||
|
|
a6af499f84 | ||
|
|
a9b551d73e | ||
|
|
c510870736 | ||
|
|
e8783f6a33 | ||
|
|
8cda4512ad | ||
|
|
fc90bdc638 | ||
|
|
5a88165a26 | ||
|
|
3466842cd4 |
18
.github/workflows/android.yaml
vendored
@@ -6,11 +6,13 @@ on:
|
||||
- main
|
||||
paths:
|
||||
- "examples/simple-chatbot/client/android/**"
|
||||
- "examples/p2p-webrtc/video-transform/client/android/**"
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
paths:
|
||||
- "examples/simple-chatbot/client/android/**"
|
||||
- "examples/p2p-webrtc/video-transform/client/android/**"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
sdk_git_ref:
|
||||
@@ -23,7 +25,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
sdk:
|
||||
name: "Simple chatbot demo"
|
||||
name: "Demo apps"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -37,12 +39,22 @@ jobs:
|
||||
distribution: 'temurin'
|
||||
java-version: '17'
|
||||
|
||||
- name: Build demo app
|
||||
- name: "Example app: Simple Chatbot"
|
||||
working-directory: examples/simple-chatbot/client/android
|
||||
run: ./gradlew :simple-chatbot-client:assembleDebug
|
||||
|
||||
- name: Upload demo APK
|
||||
- name: Upload Simple Chatbot APK
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Simple Chatbot Android Client
|
||||
path: examples/simple-chatbot/client/android/simple-chatbot-client/build/outputs/apk/debug/simple-chatbot-client-debug.apk
|
||||
|
||||
- name: "Example app: Small WebRTC Client"
|
||||
working-directory: examples/p2p-webrtc/video-transform/client/android
|
||||
run: ./gradlew :small-webrtc-client:assembleDebug
|
||||
|
||||
- name: Upload Small WebRTC APK
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Small WebRTC Android Client
|
||||
path: examples/p2p-webrtc/video-transform/client/android/small-webrtc-client/build/outputs/apk/debug/small-webrtc-client-debug.apk
|
||||
|
||||
30
.gitignore
vendored
@@ -7,7 +7,7 @@ venv
|
||||
/.idea
|
||||
#*#
|
||||
|
||||
# Distribution / packaging
|
||||
# Distribution / Packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
@@ -30,24 +30,24 @@ MANIFEST
|
||||
.env
|
||||
fly.toml
|
||||
|
||||
# Example files
|
||||
pipecat/examples/twilio-chatbot/templates/streams.xml
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/node_modules/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/.expo/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/dist/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/npm-debug.*
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.jks
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.p8
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.p12
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.key
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.mobileprovision
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.orig.*
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/web-build/
|
||||
# Examples
|
||||
examples/telnyx-chatbot/templates/streams.xml
|
||||
examples/twilio-chatbot/templates/streams.xml
|
||||
examples/**/node_modules/
|
||||
examples/**/.expo/
|
||||
examples/**/dist/
|
||||
examples/**/npm-debug.*
|
||||
examples/**/*.jks
|
||||
examples/**/*.p8
|
||||
examples/**/*.p12
|
||||
examples/**/*.key
|
||||
examples/**/*.mobileprovision
|
||||
examples/**/*.orig.*
|
||||
examples/**/web-build/
|
||||
|
||||
# macOS
|
||||
.DS_Store
|
||||
|
||||
|
||||
# Documentation
|
||||
docs/api/_build/
|
||||
docs/api/api
|
||||
645
CHANGELOG.md
@@ -5,10 +5,636 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
## [0.0.70]
|
||||
|
||||
### Added
|
||||
|
||||
- Added `ExotelFrameSerializer` to handle telephony calls via Exotel.
|
||||
|
||||
- Added the option `informal` to `TranslationConfig` on Gladia config.
|
||||
Allowing to force informal language forms when available.
|
||||
|
||||
- Added `CartesiaSTTService` which is a websocket based implementation to
|
||||
transcribe audio. Added a foundational example in
|
||||
`13f-cartesia-transcription.py`
|
||||
|
||||
- Added an `websocket` example, showing how to use the new Pipecat client
|
||||
`WebsocketTransport` to connect with Pipecat `FastAPIWebsocketTransport` or
|
||||
`WebsocketServerTransport`.
|
||||
|
||||
- Added language support to `RimeHttpTTSService`. Extended languages to include
|
||||
German and French for both `RimeTTSService` and `RimeHttpTTSService`.
|
||||
|
||||
### Changed
|
||||
|
||||
- Upgraded `daily-python` to 0.19.2.
|
||||
|
||||
- Make `PipelineTask.add_observer()` synchronous. This allows callers to call it
|
||||
before doing the work of running the `PipelineTask` (i.e. without invoking
|
||||
`PipelineTask.set_event_loop()` first).
|
||||
|
||||
- Pipecat 0.0.69 forced `uvloop` event loop on Linux on macOS. Unfortunately,
|
||||
this is causing issue in some systems. So, `uvloop` is not enabled by default
|
||||
anymore. If you want to use `uvloop` you can just set the `asyncio` event
|
||||
policy before starting your agent with:
|
||||
|
||||
```python
|
||||
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
||||
```
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with various TTS services that would cause audio glitches at
|
||||
the start of every bot turn.
|
||||
|
||||
- Fixed an `ElevenLabsTTSService` issue where a context warning was printed
|
||||
when pushing a `TTSSpeakFrame`.
|
||||
|
||||
- Fixed an `AssemblyAISTTService` issue that could cause unexpected behavior
|
||||
when yielding empty `Frame()`s.
|
||||
|
||||
- Fixed an issue where `OutputAudioRawFrame.transport_destination` was being
|
||||
reset to `None` instead of retaining its intended value before sending the
|
||||
audio frame to `write_audio_frame`.
|
||||
|
||||
- Fixed a typo in Livekit transport that prevented initialization.
|
||||
|
||||
## [0.0.69] - 2025-06-02 "AI Engineer World's Fair release" ✨
|
||||
|
||||
### Added
|
||||
|
||||
- Added a new frame `FunctionCallsStartedFrame`. This frame is pushed both
|
||||
upstream and downstream from the LLM service to indicate that one or more
|
||||
function calls are going to be executed.
|
||||
|
||||
- Added LLM services `on_function_calls_started` event. This event will be
|
||||
triggered when the LLM service receives function calls from the model and is
|
||||
going to start executing them.
|
||||
|
||||
- Function calls can now be executed sequentially (in the order received in the
|
||||
completion) by passing `run_in_parallel=False` when creating your LLM
|
||||
service. By default, if the LLM completion returns 2 or more function calls
|
||||
they run concurrently. In both cases, concurrently and sequentially, a new LLM
|
||||
completion will run when the last function call finishes.
|
||||
|
||||
- Added OpenTelemetry tracing for `GeminiMultimodalLiveLLMService` and
|
||||
`OpenAIRealtimeBetaLLMService`.
|
||||
|
||||
- Added initial support for interruption strategies, which determine if the user
|
||||
should interrupt the bot while the bot is speaking. Interruption strategies
|
||||
can be based on factors such as audio volume or the number of words spoken by
|
||||
the user. These can be specified via the new `interruption_strategies` field
|
||||
in `PipelineParams`. A new `MinWordsInterruptionStrategy` strategy has been
|
||||
introduced which triggers an interruption if the user has spoken a minimum
|
||||
number of words. If no interruption strategies are specified, the normal
|
||||
interruption behavior applies. If multiple strategies are provided, the first
|
||||
one that evaluates to true will trigger the interruption.
|
||||
|
||||
- `BaseInputTransport` now handles `StopFrame`. When a `StopFrame` is received
|
||||
the transport will pause sending frames downstream until a new `StartFrame` is
|
||||
received. This allows the transport to be reused (keeping the same connection)
|
||||
in a different pipeline.
|
||||
|
||||
- Updated AssemblyAI STT service to support their latest streaming
|
||||
speech-to-text model with improved transcription latency and endpointing.
|
||||
|
||||
- You can now access STT service results through the new
|
||||
`TranscriptionFrame.result` and `InterimTranscriptionFrame.result` field. This
|
||||
is useful in case you use some specific settings for the STT and you want to
|
||||
access the STT results.
|
||||
|
||||
- The examples runner is now public from the `pipecat.examples` package. This
|
||||
allows everyone to build their own examples and run them easily.
|
||||
|
||||
- It is now possible to push `OutputDTMFFrame` or `OutputDTMFUrgentFrame` with
|
||||
`DailyTransport`. This will be sent properly if a Daily dial-out connection
|
||||
has been established.
|
||||
|
||||
- Added `OutputDTMFUrgentFrame` to send a DTMF keypress quickly. The previous
|
||||
`OutputDTMFFrame` queues the keypress with the rest of data frames.
|
||||
|
||||
- Added `DTMFAggregator`, which aggregates keypad presses into
|
||||
`TranscriptionFrame`s. Aggregation occurs after a timeout, termination key
|
||||
press, or user interruption. You can specify the prefix of the
|
||||
`TranscriptionFrame`.
|
||||
|
||||
- Added new functions `DailyTransport.start_transcription()` and
|
||||
`DailyTransport.stop_transcription()` to be able to start and stop Daily
|
||||
transcription dynamically (maybe with different settings).
|
||||
|
||||
### Changed
|
||||
|
||||
- Reverted the default model for `GeminiMultimodalLiveLLMService` back to
|
||||
`models/gemini-2.0-flash-live-001`.
|
||||
`gemini-2.5-flash-preview-native-audio-dialog` has inconsistent performance.
|
||||
You can opt in to using this model by setting the `model` arg.
|
||||
|
||||
- Function calls are now cancelled by default if there's an interruption. To
|
||||
disable this behavior you can set `cancel_on_interruption=False` when
|
||||
registering the function call. Since function calls are executed as tasks you
|
||||
can tell if a function call has been cancelled by catching the
|
||||
`asyncio.CancelledError` exception (and don't forget to raise it again!).
|
||||
|
||||
- Updated OpenTelemetry tracing attribute `metrics.ttfb_ms` to `metrics.ttfb`.
|
||||
The attribute reports TTFB in seconds.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `DailyTransport.send_dtmf()` is deprecated, push an `OutputDTMFFrame` or an
|
||||
`OutputDTMFUrgentFrame` instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with `ElevenLabsTTSService` where long responses would
|
||||
continue generating output even after an interruption.
|
||||
|
||||
- Fixed an issue with the `OpenAILLMContext` where non-Roman characters were
|
||||
being incorrectly encoded as Unicode escape sequences. This was a logging
|
||||
issue and did not impact the actual conversation.
|
||||
|
||||
- In `AWSBedrockLLMService`, worked around a possible bug in AWS Bedrock where
|
||||
a `toolConfig` is required if there has been previous tool use in the
|
||||
messages array. This workaround includes a no_op factory function call is
|
||||
used to satisfy the requirement.
|
||||
|
||||
- Fixed `WebsocketClientTransport` to use `FrameProcessorSetup.task_manager`
|
||||
instead of `StartFrame.task_manager`.
|
||||
|
||||
### Performance
|
||||
|
||||
- Use `uvloop` as the new event loop on Linux and macOS systems.
|
||||
|
||||
## [0.0.68] - 2025-05-28
|
||||
|
||||
### Added
|
||||
|
||||
- Added `GoogleHttpTTSService` which uses Google's HTTP TTS API.
|
||||
|
||||
- Added `TavusTransport`, a new transport implementation compatible with any
|
||||
Pipecat pipeline. When using the `TavusTransport`the Pipecat bot will
|
||||
connect in the same room as the Tavus Avatar and the user.
|
||||
|
||||
- Added `PlivoFrameSerializer` to support Plivo calls. A full running example
|
||||
has also been added to `examples/plivo-chatbot`.
|
||||
|
||||
- Added `UserBotLatencyLogObserver`. This is an observer that logs the latency
|
||||
between when the user stops speaking and when the bot starts speaking. This
|
||||
gives you an initial idea on how quickly the AI services respond.
|
||||
|
||||
- Added `SarvamTTSService`, which implements Sarvam AI's TTS API:
|
||||
https://docs.sarvam.ai/api-reference-docs/text-to-speech/convert.
|
||||
|
||||
- Added `PipelineTask.add_observer()` and `PipelineTask.remove_observer()` to
|
||||
allow mangaging observers at runtime. This is useful for cases where the task
|
||||
is passed around to other code components that might want to observe the
|
||||
pipeline dynamically.
|
||||
|
||||
- Added `user_id` field to `TranscriptionMessage`. This allows identifying the
|
||||
user in a multi-user scenario. Note that this requires that
|
||||
`TranscriptionFrame` has the `user_id` properly set.
|
||||
|
||||
- Added new `PipelineTask` event handlers `on_pipeline_started`,
|
||||
`on_pipeline_stopped`, `on_pipeline_ended` and `on_pipeline_cancelled`, which
|
||||
correspond to the `StartFrame`, `StopFrame`, `EndFrame` and `CancelFrame`
|
||||
respectively.
|
||||
|
||||
- Added additional languages to `LmntTTSService`. Languages include: `hi`,
|
||||
`id`, `it`, `ja`, `nl`, `pl`, `ru`, `sv`, `th`, `tr`, `uk`, `vi`.
|
||||
|
||||
- Added a `model` parameter to the `LmntTTSService` constructor, allowing
|
||||
switching between LMNT models.
|
||||
|
||||
- Added `MiniMaxHttpTTSService`, which implements MiniMax's T2A API for TTS.
|
||||
Learn more: https://www.minimax.io/platform_overview
|
||||
|
||||
- A new function `FrameProcessor.setup()` has been added to allow setting up
|
||||
frame processors before receiving a `StartFrame`. This is what's happening
|
||||
internally: `FrameProcessor.setup()` is called, `StartFrame` is pushed from
|
||||
the beginning of the pipeline, your regular pipeline operations, `EndFrame`
|
||||
or `CancelFrame` are pushed from the beginning of the pipeline and finally
|
||||
`FrameProcessor.cleanup()` is called.
|
||||
|
||||
- Added support for OpenTelemetry tracing in Pipecat. This initial
|
||||
implementation includes:
|
||||
|
||||
- A `setup_tracing` method where you can specify your OpenTelemetry exporter
|
||||
- Service decorators for STT (`@traced_stt`), LLM (`@traced_llm`), and TTS
|
||||
(`@traced_tts`) which trace the execution and collect properties and
|
||||
metrics (TTFB, token usage, character counts, etc.)
|
||||
- Class decorators that provide execution tracking; these are generic and can
|
||||
be used for service tracking as needed
|
||||
- Spans that help track traces on a per conversations and turn basis:
|
||||
|
||||
```
|
||||
conversation-uuid
|
||||
├── turn-1
|
||||
│ ├── stt_deepgramsttservice
|
||||
│ ├── llm_openaillmservice
|
||||
│ └── tts_cartesiattsservice
|
||||
...
|
||||
└── turn-n
|
||||
└── ...
|
||||
```
|
||||
|
||||
By default, Pipecat has implemented service decorators to trace execution of
|
||||
STT, LLM, and TTS services. You can enable tracing by setting
|
||||
`enable_tracing` to `True` in the PipelineTask.
|
||||
|
||||
- Added `TurnTrackingObserver`, which tracks the start and end of a user/bot
|
||||
turn pair and emits events `on_turn_started` and `on_turn_stopped`
|
||||
corresponding to the start and end of a turn, respectively.
|
||||
|
||||
- Allow passing observers to `run_test()` while running unit tests.
|
||||
|
||||
### Changed
|
||||
|
||||
- Upgraded `daily-python` to 0.19.1.
|
||||
|
||||
- ⚠️ Updated `SmallWebRTCTransport` to align with how other transports handle
|
||||
`on_client_disconnected`. Now, when the connection is closed and no reconnection
|
||||
is attempted, `on_client_disconnected` is called instead of `on_client_close`. The
|
||||
`on_client_close` callback is no longer used, use `on_client_disconnected` instead.
|
||||
|
||||
- Check if `PipelineTask` has already been cancelled.
|
||||
|
||||
- Don't raise an exception if event handler is not registered.
|
||||
|
||||
- Upgraded `deepgram-sdk` to 4.1.0.
|
||||
|
||||
- Updated `GoogleTTSService` to use Google's streaming TTS API. The default
|
||||
voice also updated to `en-US-Chirp3-HD-Charon`.
|
||||
|
||||
- ⚠️ Refactored the `TavusVideoService`, so it acts like a proxy, sending audio
|
||||
to Tavus and receiving both audio and video. This will make
|
||||
`TavusVideoService` usable with any Pipecat pipeline and with any transport.
|
||||
This is a **breaking change**, check the
|
||||
`examples/foundational/21a-tavus-layer-small-webrtc.py` to see how to use it.
|
||||
|
||||
- `DailyTransport` now uses custom microphone audio tracks instead of virtual
|
||||
microphones. Now, multiple Daily transports can be used in the same process.
|
||||
|
||||
- `DailyTransport` now captures audio from individual participants instead of
|
||||
the whole room. This allows identifying audio frames per participant.
|
||||
|
||||
- Updated the default model for `AnthropicLLMService` to
|
||||
`claude-sonnet-4-20250514`.
|
||||
|
||||
- Updated the default model for `GeminiMultimodalLiveLLMService` to
|
||||
`models/gemini-2.5-flash-preview-native-audio-dialog`.
|
||||
|
||||
- `BaseTextFilter` methods `filter()`, `update_settings()`,
|
||||
`handle_interruption()` and `reset_interruption()` are now async.
|
||||
|
||||
- `BaseTextAggregator` methods `aggregate()`, `handle_interruption()` and
|
||||
`reset()` are now async.
|
||||
|
||||
- The API version for `CartesiaTTSService` and `CartesiaHttpTTSService` has
|
||||
been updated. Also, the `cartesia` dependency has been updated to 2.x.
|
||||
|
||||
- `CartesiaTTSService` and `CartesiaHttpTTSService` now support Cartesia's new
|
||||
`speed` parameter which accepts values of `slow`, `normal`, and `fast`.
|
||||
|
||||
- `GeminiMultimodalLiveLLMService` now uses the user transcription and usage
|
||||
metrics provided by Gemini Live.
|
||||
|
||||
- `GoogleLLMService` has been updated to use `google-genai` instead of the
|
||||
deprecated `google-generativeai`.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- In `CartesiaTTSService` and `CartesiaHttpTTSService`, `emotion` has been
|
||||
deprecated by Cartesia. Pipecat is following suit and deprecating `emotion`
|
||||
as well.
|
||||
|
||||
### Removed
|
||||
|
||||
- Since `GeminiMultimodalLiveLLMService` now transcribes it's own audio, the
|
||||
`transcribe_user_audio` arg has been removed. Audio is now transcribed
|
||||
automatically.
|
||||
|
||||
- Removed `SileroVAD` frame processor, just use `SileroVADAnalyzer`
|
||||
instead. Also removed, `07a-interruptible-vad.py` example.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `DailyTransport` issue that was not allow capturing video frames if
|
||||
framerate was greater than zero.
|
||||
|
||||
- Fixed a `DeegramSTTService` connection issue when the user provided their own
|
||||
`LiveOptions`.
|
||||
|
||||
- Fixed a `DailyTransport` issue that would cause images needing resize to block
|
||||
the event loop.
|
||||
|
||||
- Fixed an issue with `ElevenLabsTTSService` where changing the model or voice
|
||||
while the service is running wasn't working.
|
||||
|
||||
- Fixed an issue that would cause multiple instances of the same class to behave
|
||||
incorrectly if any of the given constructor arguments defaulted to a mutable
|
||||
value (e.g. lists, dictionaries, objects).
|
||||
|
||||
- Fixed an issue with `CartesiaTTSService` where `TTSTextFrame` messages weren't
|
||||
being emitted when the model was set to `sonic`. This resulted in the
|
||||
assistant context not being updated with assistant messages.
|
||||
|
||||
### Performance
|
||||
|
||||
- `DailyTransport`: process audio, video and events in separate tasks.
|
||||
|
||||
- Don't create event handler tasks if no user event handlers have been
|
||||
registered.
|
||||
|
||||
### Other
|
||||
|
||||
- It is now possible to run all (or most) foundational example with multiple
|
||||
transports. By default, they run with P2P (Peer-To-Peer) WebRTC so you can try
|
||||
everything locally. You can also run them with Daily or even with a Twilio
|
||||
phone number.
|
||||
|
||||
- Added foundation examples `07y-interruptible-minimax.py` and
|
||||
`07z-interruptible-sarvam.py`to show how to use the `MiniMaxHttpTTSService`
|
||||
and `SarvamTTSService`, respectively.
|
||||
|
||||
- Added an `open-telemetry-tracing` example, showing how to setup tracing. The
|
||||
example also includes Jaeger as an open source OpenTelemetry client to review
|
||||
traces from the example runs.
|
||||
|
||||
- Added foundational example `29-turn-tracking-observer.py` to show how to use
|
||||
the `TurnTrackingObserver`.
|
||||
|
||||
## [0.0.67] - 2025-05-07
|
||||
|
||||
### Added
|
||||
|
||||
- Added `DebugLogObserver` for detailed frame logging with configurable
|
||||
filtering by frame type and endpoint. This observer automatically extracts
|
||||
and formats all frame data fields for debug logging.
|
||||
|
||||
- `UserImageRequestFrame.video_source` field has been added to request an image
|
||||
from the desired video source.
|
||||
|
||||
- Added support for the AWS Nova Sonic speech-to-speech model with the new
|
||||
`AWSNovaSonicLLMService`.
|
||||
See https://docs.aws.amazon.com/nova/latest/userguide/speech.html.
|
||||
Note that it requires Python >= 3.12 and `pip install pipecat-ai[aws-nova-sonic]`.
|
||||
|
||||
- Added new AWS services `AWSBedrockLLMService` and `AWSTranscribeSTTService`.
|
||||
|
||||
- Added `on_active_speaker_changed` event handler to the `DailyTransport` class.
|
||||
|
||||
- Added `enable_ssml_parsing` and `enable_logging` to `InputParams` in
|
||||
`ElevenLabsTTSService`.
|
||||
|
||||
- Added support to `RimeHttpTTSService` for the `arcana` model.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `ElevenLabsTTSService` to use the beta websocket API
|
||||
(multi-stream-input). This new API supports context_ids and cancelling those
|
||||
contexts, which greatly improves interruption handling.
|
||||
|
||||
- Observers `on_push_frame()` now take a single argument `FramePushed` instead
|
||||
of multiple arguments.
|
||||
|
||||
- Updated the default voice for `DeepgramTTSService` to `aura-2-helena-en`.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `PollyTTSService` is now deprecated, use `AWSPollyTTSService` instead.
|
||||
|
||||
- Observer `on_push_frame(src, dst, frame, direction, timestamp)` is now
|
||||
deprecated, use `on_push_frame(data: FramePushed)` instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `DailyTransport` issue that was causing issues when multiple audio or
|
||||
video sources where being captured.
|
||||
|
||||
- Fixed a `UltravoxSTTService` issue that would cause the service to generate
|
||||
all tokens as one word.
|
||||
|
||||
- Fixed a `PipelineTask` issue that would cause tasks to not be cancelled if
|
||||
task was cancelled from outside of Pipecat.
|
||||
|
||||
- Fixed a `TaskManager` that was causing dangling tasks to be reported.
|
||||
|
||||
- Fixed an issue that could cause data to be sent to the transports when they
|
||||
were still not ready.
|
||||
|
||||
- Remove custom audio tracks from `DailyTransport` before leaving.
|
||||
|
||||
### Removed
|
||||
|
||||
- Removed `CanonicalMetricsService` as it's no longer maintained.
|
||||
|
||||
## [0.0.66] - 2025-05-02
|
||||
|
||||
### Added
|
||||
|
||||
- Added two new input parameters to `RimeTTSService`: `pause_between_brackets`
|
||||
and `phonemize_between_brackets`.
|
||||
|
||||
- Added support for cross-platform local smart turn detection. You can use
|
||||
`LocalSmartTurnAnalyzer` for on-device inference using Torch.
|
||||
|
||||
- `BaseOutputTransport` now allows multiple destinations if the transport
|
||||
implementation supports it (e.g. Daily's custom tracks). With multiple
|
||||
destinations it is possible to send different audio or video tracks with a
|
||||
single transport simultaneously. To do that, you need to set the new
|
||||
`Frame.transport_destination` field with your desired transport destination
|
||||
(e.g. custom track name), tell the transport you want a new destination with
|
||||
`TransportParams.audio_out_destinations` or
|
||||
`TransportParams.video_out_destinations` and the transport should take care of
|
||||
the rest.
|
||||
|
||||
- Similar to the new `Frame.transport_destination`, there's a new
|
||||
`Frame.transport_source` field which is set by the `BaseInputTransport` if the
|
||||
incoming data comes from a non-default source (e.g. custom tracks).
|
||||
|
||||
- `TTSService` has a new `transport_destination` constructor parameter. This
|
||||
parameter will be used to update the `Frame.transport_destination` field for
|
||||
each generated `TTSAudioRawFrame`. This allows sending multiple bots' audio to
|
||||
multiple destinations in the same pipeline.
|
||||
|
||||
- Added `DailyTransportParams.camera_out_enabled` and
|
||||
`DailyTransportParams.microphone_out_enabled` which allows you to
|
||||
enable/disable the main output camera or microphone tracks. This is useful if
|
||||
you only want to use custom tracks and not send the main tracks. Note that you
|
||||
still need `audio_out_enabled=True` or `video_out_enabled`.
|
||||
|
||||
- Added `DailyTransport.capture_participant_audio()` which allows you to capture
|
||||
an audio source (e.g. "microphone", "screenAudio" or a custom track name) from
|
||||
a remote participant.
|
||||
|
||||
- Added `DailyTransport.update_publishing()` which allows you to update the call
|
||||
video and audio publishing settings (e.g. audio and video quality).
|
||||
|
||||
- Added `RTVIObserverParams` which allows you to configure what RTVI messages
|
||||
are sent to the clients.
|
||||
|
||||
- Added a `context_window_compression` InputParam to
|
||||
`GeminiMultimodalLiveLLMService` which allows you to enable a sliding context
|
||||
window for the session as well as set the token limit of the sliding window.
|
||||
|
||||
- Updated `SmallWebRTCConnection` to support `ice_servers` with credentials.
|
||||
|
||||
- Added `VADUserStartedSpeakingFrame` and `VADUserStoppedSpeakingFrame`,
|
||||
indicating when the VAD detected the user to start and stop speaking. These
|
||||
events are helpful when using smart turn detection, as the user's stop time
|
||||
can differ from when their turn ends (signified by UserStoppedSpeakingFrame).
|
||||
|
||||
- Added `TranslationFrame`, a new frame type that contains a translated
|
||||
transcription.
|
||||
|
||||
- Added `TransportParams.audio_in_passthrough`. If set (the default), incoming
|
||||
audio will be pushed downstream.
|
||||
|
||||
- Added `MCPClient`; a way to connect to MCP servers and use the MCP servers'
|
||||
tools.
|
||||
|
||||
- Added `Mem0 OSS`, along with Mem0 cloud support now the OSS version is also
|
||||
available.
|
||||
|
||||
### Changed
|
||||
|
||||
- `TransportParams.audio_mixer` now supports a string and also a dictionary to
|
||||
provide a mixer per destination. For example:
|
||||
|
||||
```python
|
||||
audio_out_mixer={
|
||||
"track-1": SoundfileMixer(...),
|
||||
"track-2": SoundfileMixer(...),
|
||||
"track-N": SoundfileMixer(...),
|
||||
},
|
||||
```
|
||||
|
||||
- The `STTMuteFilter` now mutes `InterimTranscriptionFrame` and
|
||||
`TranscriptionFrame` which allows the `STTMuteFilter` to be used in
|
||||
conjunction with transports that generate transcripts, e.g. `DailyTransport`.
|
||||
|
||||
- Function calls now receive a single parameter `FunctionCallParams` instead of
|
||||
`(function_name, tool_call_id, args, llm, context, result_callback)` which is
|
||||
now deprecated.
|
||||
|
||||
- Changed the user aggregator timeout for late transcriptions from 1.0s to 0.5s
|
||||
(`LLMUserAggregatorParams.aggregation_timeout`). Sometimes, the STT services
|
||||
might give us more than one transcription which could come after the user
|
||||
stopped speaking. We still want to include these additional transcriptions
|
||||
with the first one because it's part of the user turn. This is what this
|
||||
timeout is helpful with.
|
||||
|
||||
- Short utterances not detected by VAD while the bot is speaking are now
|
||||
ignored. This reduces the amount of bot interruptions significantly providing
|
||||
a more natural conversation experience.
|
||||
|
||||
- Updated `GladiaSTTService` to output a `TranslationFrame` when specifying a
|
||||
`translation` and `translation_config`.
|
||||
|
||||
- STT services now passthrough audio frames by default. This allows you to add
|
||||
audio recording without worrying about what's wrong in your pipeline when it
|
||||
doesn't work the first time.
|
||||
|
||||
- Input transports now always push audio downstream unless disabled with
|
||||
`TransportParams.audio_in_passthrough`. After many Pipecat releases, we
|
||||
realized this is the common use case. There are use cases where the input
|
||||
transport already provides STT and you also don't want recordings, in which
|
||||
case there's no need to push audio to the rest of the pipeline, but this is
|
||||
not a very common case.
|
||||
|
||||
- Added `RivaSegmentedSTTService`, which allows Riva offline/batch models, such
|
||||
as to be "canary-1b-asr" used in Pipecat.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- Function calls with parameters
|
||||
`(function_name, tool_call_id, args, llm, context, result_callback)` are
|
||||
deprectated, use a single `FunctionCallParams` parameter instead.
|
||||
|
||||
- `TransportParams.camera_*` parameters are now deprecated, use
|
||||
`TransportParams.video_*` instead.
|
||||
|
||||
- `TransportParams.vad_enabled` parameter is now deprecated, use
|
||||
`TransportParams.audio_in_enabled` and `TransportParams.vad_analyzer` instead.
|
||||
|
||||
- `TransportParams.vad_audio_passthrough` parameter is now deprecated, use
|
||||
`TransportParams.audio_in_passthrough` instead.
|
||||
|
||||
- `ParakeetSTTService` is now deprecated, use `RivaSTTService` instead, which uses
|
||||
the model "parakeet-ctc-1.1b-asr" by default.
|
||||
|
||||
- `FastPitchTTSService` is now deprecated, use `RivaTTSService` instead, which uses
|
||||
the model "magpie-tts-multilingual" by default.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with `SimliVideoService` where the bot was continuously outputting
|
||||
audio, which prevents the `BotStoppedSpeakingFrame` from being emitted.
|
||||
|
||||
- Fixed an issue where `OpenAIRealtimeBetaLLMService` would add two assistant
|
||||
messages to the context.
|
||||
|
||||
- Fixed an issue with `GeminiMultimodalLiveLLMService` where the context
|
||||
contained tokens instead of words.
|
||||
|
||||
- Fixed an issue with HTTP Smart Turn handling, where the service returns a 500
|
||||
error. Previously, this would cause an unhandled exception. Now, a 500 error
|
||||
is treated as an incomplete response.
|
||||
|
||||
- Fixed a TTS services issue that could cause assistant output not to be
|
||||
aggregated to the context when also using `TTSSpeakFrame`s.
|
||||
|
||||
- Fixed an issue where the `SmartTurnMetricsData` was reporting 0ms for
|
||||
inference and processing time when using the `FalSmartTurnAnalyzer`.
|
||||
|
||||
### Other
|
||||
|
||||
- Added `examples/daily-custom-tracks` to show how to send and receive Daily
|
||||
custom tracks.
|
||||
|
||||
- Added `examples/daily-multi-translation` to showcase how to send multiple
|
||||
simulataneous translations with the same transport.
|
||||
|
||||
- Added 04 foundational examples for client/server transports. Also, renamed
|
||||
`29-livekit-audio-chat.py` to `04b-transports-livekit.py`.
|
||||
|
||||
- Added foundational example `13c-gladia-translation.py` showing how to use
|
||||
`TranscriptionFrame` and `TranslationFrame`.
|
||||
|
||||
## [0.0.65] - 2025-04-23 "Sant Jordi's release" 🌹📕
|
||||
|
||||
https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia
|
||||
|
||||
### Added
|
||||
|
||||
- Added automatic hangup logic to the Telnyx serializer. This feature hangs up
|
||||
the Telnyx call when an `EndFrame` or `CancelFrame` is received. It is
|
||||
enabled by default and is configurable via the `auto_hang_up` `InputParam`.
|
||||
|
||||
- Added a keepalive task to `GladiaSTTService` to prevent the websocket from
|
||||
disconnecting after 30 seconds of no audio input.
|
||||
|
||||
### Changed
|
||||
|
||||
- The `InputParams` for `ElevenLabsTTSService` and `ElevenLabsHttpTTSService`
|
||||
no longer require that `stability` and `similarity_boost` be set. You can
|
||||
individually set each param.
|
||||
|
||||
- In `TwilioFrameSerializer`, `call_sid` is Optional so as to avoid a breaking
|
||||
changed. `call_sid` is required to automatically hang up.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where `TwilioFrameSerializer` would send two hang up commands:
|
||||
one for the `EndFrame` and one for the `CancelFrame`.
|
||||
|
||||
## [0.0.64] - 2025-04-22
|
||||
|
||||
### Added
|
||||
|
||||
- Added automatic hangup logic to the Twilio serializer. This feature hangs up
|
||||
the Twilio call when an `EndFrame` or `CancelFrame` is received. It is
|
||||
enabled by default and is configurable via the `auto_hang_up` `InputParam`.
|
||||
|
||||
- Added `SmartTurnMetricsData`, which contains end-of-turn prediction metrics,
|
||||
to the `MetricsFrame`. Using `MetricsFrame`, you can now retrieve prediction
|
||||
confidence scores and processing time metrics from the smart turn analyzers.
|
||||
@@ -17,9 +643,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
`GoogleSTTService`, `GoogleTTSService`, and `GoogleVertexLLMService`.
|
||||
|
||||
- Added support for Smart Turn Detection via the `turn_analyzer` transport
|
||||
parameter. You can now choose between `SmartTurnAnalyzer()` for remote
|
||||
inference or `LocalCoreMLSmartTurnAnalyzer()` for on-device inference using
|
||||
Core ML.
|
||||
parameter. You can now choose between `HttpSmartTurnAnalyzer()` or
|
||||
`FalSmartTurnAnalyzer()` for remote inference or
|
||||
`LocalCoreMLSmartTurnAnalyzer()` for on-device inference using Core ML.
|
||||
|
||||
- `DeepgramTTSService` accepts `base_url` argument again, allowing you to
|
||||
connect to an on-prem service.
|
||||
@@ -44,6 +670,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Changed
|
||||
|
||||
- `GrokLLMService` now uses `grok-3-beta` as its default model.
|
||||
|
||||
- Daily's REST helpers now include an `eject_at_token_exp` param, which ejects
|
||||
the user when their token expires. This new parameter defaults to False.
|
||||
Also, the default value for `enable_prejoin_ui` changed to False and
|
||||
@@ -75,8 +703,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Fixed an issue in `SmallWebRTCTransport` where an error was thrown if the
|
||||
client did not create a video transceiver.
|
||||
|
||||
- Fixed an issue where LLM input parameters were not working and applied correctly in `GoogleVertexLLMService`, causing
|
||||
unexpected behavior during inference.
|
||||
- Fixed an issue where LLM input parameters were not working and applied
|
||||
correctly in `GoogleVertexLLMService`, causing unexpected behavior during
|
||||
inference.
|
||||
|
||||
### Other
|
||||
|
||||
- Updated the `twilio-chatbot` example to use the auto-hangup feature.
|
||||
|
||||
## [0.0.63] - 2025-04-11
|
||||
|
||||
|
||||
4
MANIFEST.in
Normal file
@@ -0,0 +1,4 @@
|
||||
prune docs
|
||||
prune examples
|
||||
prune scripts
|
||||
prune tests
|
||||
33
README.md
@@ -8,6 +8,8 @@
|
||||
|
||||
**Pipecat** is an open-source Python framework for building real-time voice and multimodal conversational agents. Orchestrate audio and video, AI services, different transports, and conversation pipelines effortlessly—so you can focus on what makes your agent unique.
|
||||
|
||||
> Want to dive right in? [Install Pipecat](https://docs.pipecat.ai/getting-started/installation) then try the [quickstart](https://docs.pipecat.ai/getting-started/quickstart).
|
||||
|
||||
## 🚀 What You Can Build
|
||||
|
||||
- **Voice Assistants** – natural, streaming conversations with AI
|
||||
@@ -49,18 +51,19 @@ You can connect to Pipecat from any platform using our official SDKs:
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Category | Services |
|
||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), Cartesia, [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
@@ -130,6 +133,12 @@ pip install "pipecat-ai[option,...]"
|
||||
|
||||
### Running tests
|
||||
|
||||
Install the test dependencies:
|
||||
|
||||
```shell
|
||||
pip install -r test-requirements.txt
|
||||
```
|
||||
|
||||
From the root directory, run:
|
||||
|
||||
```shell
|
||||
|
||||
@@ -50,7 +50,6 @@ autodoc_mock_imports = [
|
||||
"pyht.protos",
|
||||
"pyht.protos.api_pb2",
|
||||
"pipecat_ai_playht", # PlayHT wrapper
|
||||
"vllm",
|
||||
"aiortc",
|
||||
"aiortc.mediastreams",
|
||||
"cv2",
|
||||
@@ -76,7 +75,6 @@ autodoc_mock_imports = [
|
||||
"openpipe",
|
||||
"simli",
|
||||
"soundfile",
|
||||
# Existing mocks
|
||||
"pipecat_ai_krisp",
|
||||
"pyaudio",
|
||||
"_tkinter",
|
||||
@@ -87,6 +85,66 @@ autodoc_mock_imports = [
|
||||
"pydantic.Field",
|
||||
"pydantic._internal._model_construction",
|
||||
"pydantic._internal._fields",
|
||||
# Moondream dependencies
|
||||
"torch",
|
||||
"transformers",
|
||||
"intel_extension_for_pytorch",
|
||||
# Ultravox dependencies
|
||||
"huggingface_hub",
|
||||
"vllm",
|
||||
"vllm.engine.arg_utils",
|
||||
"transformers.AutoTokenizer",
|
||||
# Langchain dependencies
|
||||
"langchain_core",
|
||||
"langchain_core.messages",
|
||||
"langchain_core.runnables",
|
||||
"langchain_core.messages.AIMessageChunk",
|
||||
"langchain_core.runnables.Runnable",
|
||||
# LiveKit dependencies
|
||||
"livekit",
|
||||
"livekit.rtc",
|
||||
"livekit_api",
|
||||
"livekit_protocol",
|
||||
"tenacity",
|
||||
"tenacity.retry",
|
||||
"tenacity.stop_after_attempt",
|
||||
"tenacity.wait_exponential",
|
||||
"rtc",
|
||||
"rtc.Room",
|
||||
"rtc.RoomOptions",
|
||||
"rtc.AudioSource",
|
||||
"rtc.LocalAudioTrack",
|
||||
"rtc.TrackPublishOptions",
|
||||
"rtc.TrackSource",
|
||||
"rtc.AudioStream",
|
||||
"rtc.AudioFrameEvent",
|
||||
"rtc.AudioFrame",
|
||||
"rtc.Track",
|
||||
"rtc.TrackKind",
|
||||
"rtc.RemoteParticipant",
|
||||
"rtc.RemoteTrackPublication",
|
||||
"rtc.DataPacket",
|
||||
# Riva dependencies
|
||||
"riva",
|
||||
"riva.client",
|
||||
"riva.client.Auth",
|
||||
"riva.client.ASRService",
|
||||
"riva.client.StreamingRecognitionConfig",
|
||||
"riva.client.RecognitionConfig",
|
||||
"riva.client.AudioEncoding",
|
||||
"riva.client.proto.riva_tts_pb2",
|
||||
"riva.client.SpeechSynthesisService",
|
||||
# Local CoreML Smart Turn dependencies
|
||||
"coremltools",
|
||||
"coremltools.models",
|
||||
"coremltools.models.MLModel",
|
||||
"torch",
|
||||
"torch.nn",
|
||||
"torch.nn.functional",
|
||||
"transformers",
|
||||
"transformers.AutoFeatureExtractor",
|
||||
# Also add specific classes that are imported
|
||||
"AutoFeatureExtractor",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
@@ -118,12 +176,25 @@ def verify_modules():
|
||||
},
|
||||
}
|
||||
|
||||
# Skip importing modules that are in autodoc_mock_imports
|
||||
skipped_modules = set(autodoc_mock_imports)
|
||||
|
||||
missing = []
|
||||
for category, modules in required_modules.items():
|
||||
if isinstance(modules, dict):
|
||||
# Handle nested structure
|
||||
for subcategory, submodules in modules.items():
|
||||
for module in submodules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if (
|
||||
f"pipecat.{category}.{subcategory}.{module}" in skipped_modules
|
||||
or module in skipped_modules
|
||||
):
|
||||
logger.info(
|
||||
f"Skipping import of mocked module: pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.info(
|
||||
@@ -137,6 +208,11 @@ def verify_modules():
|
||||
else:
|
||||
# Handle flat structure
|
||||
for module in modules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if f"pipecat.{category}.{module}" in skipped_modules or module in skipped_modules:
|
||||
logger.info(f"Skipping import of mocked module: pipecat.{category}.{module}")
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{module}")
|
||||
logger.info(f"Successfully imported pipecat.{category}.{module}")
|
||||
|
||||
@@ -10,7 +10,6 @@ pipecat-ai[anthropic]
|
||||
pipecat-ai[assemblyai]
|
||||
pipecat-ai[aws]
|
||||
pipecat-ai[azure]
|
||||
pipecat-ai[canonical]
|
||||
pipecat-ai[cartesia]
|
||||
pipecat-ai[cerebras]
|
||||
pipecat-ai[deepseek]
|
||||
@@ -26,20 +25,23 @@ pipecat-ai[grok]
|
||||
pipecat-ai[groq]
|
||||
# pipecat-ai[krisp] # Mocked
|
||||
pipecat-ai[koala]
|
||||
pipecat-ai[langchain]
|
||||
pipecat-ai[livekit]
|
||||
# pipecat-ai[langchain] # Mocked
|
||||
# pipecat-ai[livekit] # Mocked
|
||||
pipecat-ai[lmnt]
|
||||
pipecat-ai[local]
|
||||
# pipecat-ai[local-smart-turn] # Mocked
|
||||
# pipecat-ai[mem0] # Mocked
|
||||
# pipecat-ai[mlx-whisper] # Mocked
|
||||
pipecat-ai[moondream]
|
||||
# pipecat-ai[moondream] # Mocked
|
||||
pipecat-ai[nim]
|
||||
# pipecat-ai[neuphonic] # Mocked
|
||||
pipecat-ai[noisereduce]
|
||||
pipecat-ai[openai]
|
||||
# pipecat-ai[openpipe]
|
||||
# pipecat-ai[playht] # Mocked due to grpcio conflict with riva
|
||||
pipecat-ai[riva]
|
||||
pipecat-ai[qwen]
|
||||
pipecat-ai[remote-smart-turn]
|
||||
# pipecat-ai[riva] # Mocked
|
||||
pipecat-ai[silero]
|
||||
pipecat-ai[simli]
|
||||
pipecat-ai[soundfile]
|
||||
|
||||
@@ -95,5 +95,16 @@ OPENROUTER_API_KEY=...
|
||||
PIPER_BASE_URL=...
|
||||
|
||||
# Smart turn
|
||||
LOCAL_SMART_TURN_MODEL_PATH=
|
||||
REMOTE_SMART_TURN_URL=
|
||||
LOCAL_SMART_TURN_MODEL_PATH=...
|
||||
FAL_SMART_TURN_API_KEY=...
|
||||
|
||||
# Twilio
|
||||
TWILIO_ACCOUNT_SID=...
|
||||
TWILIO_AUTH_TOKEN=...
|
||||
|
||||
# MiniMax
|
||||
MINIMAX_API_KEY=...
|
||||
MINIMAX_GROUP_ID=...
|
||||
|
||||
# Sarvam AI
|
||||
SARVAM_API_KEY=...
|
||||
@@ -12,7 +12,7 @@
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"vite": "^6.0.9"
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@daily-co/daily-js": "0.74.0"
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
# Chatbot with canonical-metrics
|
||||
|
||||
This project implements a chatbot using a pipeline architecture that integrates audio processing, transcription, and a language model for conversational interactions. The chatbot operates within a daily communication environment, utilizing various services for text-to-speech and language model responses.
|
||||
|
||||
## Features
|
||||
|
||||
- **Audio Input and Output**: Captures microphone input and plays back audio responses.
|
||||
- **Voice Activity Detection**: Utilizes Silero VAD to manage audio input intelligently.
|
||||
- **Text-to-Speech**: Integrates ElevenLabs TTS service to convert text responses into audio.
|
||||
- **Language Model Interaction**: Uses OpenAI's GPT-4 model to generate responses based on user input.
|
||||
- **Transcription Services**: Captures and transcribes participant speech for analytics.
|
||||
- **Metrics Collection**: Sends audio data for analysis via Canonical Metrics Service.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.10+
|
||||
- `python-dotenv`
|
||||
- Additional libraries from the `pipecat` package.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository.
|
||||
2. Install the required packages.
|
||||
3. Set up environment variables for API keys:
|
||||
- `OPENAI_API_KEY`
|
||||
- `ELEVENLABS_API_KEY`
|
||||
- `CANONICAL_API_KEY`
|
||||
- `CANONICAL_API_URL`
|
||||
4. Run the script.
|
||||
|
||||
## Usage
|
||||
|
||||
The chatbot introduces itself and engages in conversations, providing brief and creative responses. Designed for flexibility, it can support multiple languages with appropriate configuration.
|
||||
|
||||
## Events
|
||||
|
||||
- Participants joining or leaving the call are handled dynamically, adjusting the chatbot's behavior accordingly.
|
||||
|
||||
|
||||
ℹ️ The first time, things might take extra time to get started since VAD (Voice Activity Detection) model needs to be downloaded.
|
||||
|
||||
## Get started
|
||||
|
||||
```python
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
cp env.example .env # and add your credentials
|
||||
|
||||
```
|
||||
|
||||
## Run the server
|
||||
|
||||
```bash
|
||||
python server.py
|
||||
```
|
||||
|
||||
Then, visit `http://localhost:7860/` in your browser to start a chatbot session.
|
||||
|
||||
## Build and test the Docker image
|
||||
|
||||
```
|
||||
docker build -t chatbot .
|
||||
docker run --env-file .env -p 7860:7860 chatbot
|
||||
```
|
||||
@@ -1,148 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.services.canonical.metrics import CanonicalMetricsService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_in_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_audio_passthrough=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# transcription_settings=DailyTranscriptionSettings(
|
||||
# language="es",
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="cgSgspJ2msm6clMCkdW9",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
#
|
||||
# English
|
||||
#
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself. Keep all your responses to 12 words or fewer.",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
"""
|
||||
CanonicalMetrics uses AudioBufferProcessor under the hood to buffer the audio. On
|
||||
call completion, CanonicalMetrics will send the audio buffer to Canonical for
|
||||
analysis. Visit https://voice.canonical.chat to learn more.
|
||||
"""
|
||||
audio_buffer_processor = AudioBufferProcessor(num_channels=2)
|
||||
canonical = CanonicalMetricsService(
|
||||
audio_buffer_processor=audio_buffer_processor,
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("CANONICAL_API_KEY"),
|
||||
call_id=str(uuid.uuid4()),
|
||||
assistant="pipecat-chatbot",
|
||||
assistant_speaks_first=True,
|
||||
context=context,
|
||||
)
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # microphone
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
canonical, # uploads audio buffer to Canonical AI for metrics
|
||||
audio_buffer_processor, # captures audio into a buffer
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await audio_buffer_processor.start_recording()
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if state == "left":
|
||||
# Here we don't want to cancel, we just want to finish sending
|
||||
# whatever is queued, so we use an EndFrame().
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,5 +0,0 @@
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,openai,silero,elevenlabs,canonical]
|
||||
|
||||
@@ -66,9 +66,7 @@ async def main():
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_in_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_audio_passthrough=True,
|
||||
video_out_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
@@ -130,7 +128,14 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
allow_interruptions=True,
|
||||
),
|
||||
)
|
||||
|
||||
@audiobuffer.event_handler("on_audio_data")
|
||||
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
|
||||
@@ -53,4 +53,3 @@ async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
return (url, token)
|
||||
|
||||
39
examples/daily-custom-tracks/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# Daily Custom Tracks
|
||||
|
||||
This example shows how to send and receive Daily custom tracks. We will run a simple `daily-python` application to send an audio file with a custom track (named "pipecat") to a room. Then, the Pipecat bot will mirror that custom track into another custom track (named "pipecat-mirror") in the same room.
|
||||
|
||||
## Get started
|
||||
|
||||
```python
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Run the bot
|
||||
|
||||
Start the bot by giving it a Daily room URL.
|
||||
|
||||
```bash
|
||||
python bot.py -u ROOM_URL
|
||||
```
|
||||
|
||||
The bot will wait for the first participant to join. Then, it will mirror a custom track named "pipecat" into a new custom track named "pipecat-mirror".
|
||||
|
||||
## Run the sender
|
||||
|
||||
Now, run the custom track sender. This is a simple `daily-python` application that opens and audio file and sends it as a custom track to the same Daily room.
|
||||
|
||||
```bash
|
||||
python custom_track_sender.py -u ROOM_URL -i office-ambience-mono-16000.mp3
|
||||
```
|
||||
|
||||
## Open client
|
||||
|
||||
Finally, open the client so you can hear both custom tracks.
|
||||
|
||||
```bash
|
||||
open index.html
|
||||
```
|
||||
|
||||
Once the client is opened, copy the URL of the Daily room and join it. You should be able to select which custom track you want to hear.
|
||||
87
examples/daily-custom-tracks/bot.py
Normal file
@@ -0,0 +1,87 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import Frame, InputAudioRawFrame, OutputAudioRawFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class CustomTrackMirrorProcessor(FrameProcessor):
|
||||
def __init__(self, transport_destination: str, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._transport_destination = transport_destination
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, InputAudioRawFrame) and frame.transport_source:
|
||||
output_frame = OutputAudioRawFrame(
|
||||
audio=frame.audio,
|
||||
sample_rate=frame.sample_rate,
|
||||
num_channels=frame.num_channels,
|
||||
)
|
||||
output_frame.transport_destination = self._transport_destination
|
||||
await self.push_frame(output_frame)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Custom tracks mirror",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
microphone_out_enabled=False, # Disable since we just use custom tracks
|
||||
audio_out_destinations=["pipecat-mirror"],
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
CustomTrackMirrorProcessor("pipecat-mirror"),
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_audio(participant["id"], audio_source="pipecat")
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
74
examples/daily-custom-tracks/custom_track_sender.py
Normal file
@@ -0,0 +1,74 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from daily import CallClient, CustomAudioSource, Daily
|
||||
from pydub import AudioSegment
|
||||
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument("-u", "--url", type=str, required=True, help="URL of the Daily room to join")
|
||||
parser.add_argument(
|
||||
"-i", "--input", type=str, required=True, help="Input audio file (needs 16000 sample rate)"
|
||||
)
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
audio = AudioSegment.from_mp3(args.input)
|
||||
|
||||
raw_bytes = audio.raw_data
|
||||
sample_rate = audio.frame_rate
|
||||
channels = audio.channels
|
||||
|
||||
print(f"Length: {len(raw_bytes)} bytes")
|
||||
print(f"Sample rate: {sample_rate}, Channels: {channels}")
|
||||
|
||||
# Initialize the Daily context & create call client
|
||||
Daily.init()
|
||||
|
||||
client = CallClient()
|
||||
|
||||
# Join the room and indicate we have a custom track named "pipecat".
|
||||
client.join(
|
||||
args.url,
|
||||
client_settings={
|
||||
"publishing": {
|
||||
"camera": False,
|
||||
"microphone": False,
|
||||
"customAudio": {"pipecat": True},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
# Just sleep for a couple of seconds. To do this well we should really use
|
||||
# completions.
|
||||
time.sleep(2)
|
||||
|
||||
# Create the custom audio source. This is where we will write our audio.
|
||||
audio_source = CustomAudioSource(sample_rate, channels)
|
||||
|
||||
# Create an audio track and assign it our audio source.
|
||||
client.add_custom_audio_track("pipecat", audio_source)
|
||||
|
||||
# Just sleep for a second. To do this well we should really use completions.
|
||||
time.sleep(1)
|
||||
|
||||
try:
|
||||
# Just write one second of audio until we have read all the file.
|
||||
chunk_size = sample_rate * channels * 2
|
||||
while len(raw_bytes) > 0:
|
||||
chunk = raw_bytes[:chunk_size]
|
||||
raw_bytes = raw_bytes[chunk_size:]
|
||||
audio_source.write_frames(chunk)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
client.leave()
|
||||
|
||||
# Just sleep for a second. To do this well we should really use completions.
|
||||
time.sleep(1)
|
||||
|
||||
client.release()
|
||||
173
examples/daily-custom-tracks/index.html
Normal file
@@ -0,0 +1,173 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>daily custom tracks</title>
|
||||
</head>
|
||||
<script crossorigin src="https://unpkg.com/@daily-co/daily-js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.js"></script>
|
||||
<link
|
||||
rel="stylesheet"
|
||||
type="text/css"
|
||||
href="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.css"
|
||||
/>
|
||||
<script>
|
||||
function enableButton(buttonId, enable) {
|
||||
const button = document.getElementById(buttonId);
|
||||
button.disabled = !enable;
|
||||
}
|
||||
|
||||
function enableJoinButton(enable) {
|
||||
enableButton("join-button", enable);
|
||||
}
|
||||
|
||||
function enableLeaveButton(enable) {
|
||||
enableButton("leave-button", enable);
|
||||
}
|
||||
|
||||
function destroyPlayers(query) {
|
||||
const items = document.querySelectorAll(query);
|
||||
if (items) {
|
||||
for (const item of items) {
|
||||
item.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function destroyParticipantPlayers(participantId) {
|
||||
destroyPlayers(`audio[data-participant-id="${participantId}"]`);
|
||||
destroyPlayers(`button[data-participant-id="${participantId}"]`);
|
||||
}
|
||||
|
||||
async function startPlayer(player, track) {
|
||||
player.muted = false;
|
||||
player.autoplay = true;
|
||||
if (track != null) {
|
||||
player.srcObject = new MediaStream([track]);
|
||||
}
|
||||
}
|
||||
|
||||
async function buildAudioPlayer(track, participantId) {
|
||||
const audioContainer = document.getElementById("audio-container");
|
||||
const player = document.createElement("audio");
|
||||
player.dataset.participantId = participantId;
|
||||
|
||||
// Create a new button for controlling audio
|
||||
const audioControlButton = document.createElement("button");
|
||||
audioControlButton.className = "ui primary green button"
|
||||
audioControlButton.innerText = track._mediaTag == "cam-audio" ? "english" : track._mediaTag;
|
||||
audioControlButton.dataset.participantId = participantId;
|
||||
audioControlButton.onclick = () => {
|
||||
if (player.paused) {
|
||||
|
||||
player.play();
|
||||
audioControlButton.className = "ui primary red button"
|
||||
} else {
|
||||
player.pause();
|
||||
audioControlButton.className = "ui primary green button"
|
||||
}
|
||||
};
|
||||
|
||||
audioContainer.appendChild(player);
|
||||
audioContainer.appendChild(audioControlButton);
|
||||
|
||||
await startPlayer(player, track);
|
||||
player.pause()
|
||||
|
||||
return player;
|
||||
}
|
||||
|
||||
function subscribeToTracks(participantId) {
|
||||
console.log(`subscribing to track`);
|
||||
|
||||
if (participantId === "local") {
|
||||
return;
|
||||
}
|
||||
|
||||
callObject.updateParticipant(participantId, {
|
||||
setSubscribedTracks: {
|
||||
audio: true,
|
||||
video: false,
|
||||
custom: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function startDaily() {
|
||||
enableJoinButton(true);
|
||||
enableLeaveButton(false);
|
||||
|
||||
window.callObject = window.DailyIframe.createCallObject({});
|
||||
|
||||
callObject.on("participant-joined", (e) => {
|
||||
if (!e.participant.local) {
|
||||
console.log("participant-joined", e.participant);
|
||||
subscribeToTracks(e.participant.session_id);
|
||||
}
|
||||
});
|
||||
|
||||
callObject.on("participant-left", (e) => {
|
||||
console.log("participant-left", e.participant.session_id);
|
||||
destroyParticipantPlayers(e.participant.session_id);
|
||||
});
|
||||
|
||||
callObject.on("track-started", async (e) => {
|
||||
console.log("track-started", e.track);
|
||||
if (e.track.kind === "audio") {
|
||||
await buildAudioPlayer(e.track, e.participant.session_id);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function joinRoom() {
|
||||
enableJoinButton(false);
|
||||
enableLeaveButton(true);
|
||||
|
||||
const meetingUrl = document.getElementById("meeting-url").value;
|
||||
|
||||
callObject.join({
|
||||
url: meetingUrl,
|
||||
startVideoOff: true,
|
||||
startAudioOff: true,
|
||||
subscribeToTracksAutomatically: false,
|
||||
receiveSettings: {
|
||||
base: { video: { layer: 0 } },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function leaveRoom() {
|
||||
enableJoinButton(true);
|
||||
enableLeaveButton(false);
|
||||
|
||||
callObject.leave();
|
||||
|
||||
const audioContainer = document.getElementById("audio-container");
|
||||
audioContainer.replaceChildren();
|
||||
}
|
||||
</script>
|
||||
|
||||
<body onload="startDaily()">
|
||||
<div class="ui centered page grid" style="margin-top: 30px">
|
||||
<div class="ten wide column">
|
||||
<div class="ui form" style="margin-top: 30px">
|
||||
<div class="field">
|
||||
<label>Meeting URL</label>
|
||||
<input id="meeting-url" value="" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="ui centered aligned header" style="margin-top: 30px">
|
||||
<button id="join-button" class="ui primary button" onclick="joinRoom()">
|
||||
Join
|
||||
</button>
|
||||
<button id="leave-button" class="ui button" onclick="leaveRoom()">
|
||||
Leave
|
||||
</button>
|
||||
</div>
|
||||
<div id="tile" class="ui container" style="margin-top: 30px">
|
||||
<div id="tile" class="ui center aligned grid">
|
||||
<div id="audio-container"></div><br/>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
BIN
examples/daily-custom-tracks/office-ambience-mono-16000.mp3
Normal file
2
examples/daily-custom-tracks/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
pydub
|
||||
pipecat-ai[daily]
|
||||
@@ -1,7 +1,12 @@
|
||||
FROM python:3.10-bullseye
|
||||
|
||||
RUN mkdir /app
|
||||
RUN mkdir /app/assets
|
||||
RUN mkdir /app/utils
|
||||
COPY *.py /app/
|
||||
COPY requirements.txt /app/
|
||||
|
||||
|
||||
WORKDIR /app
|
||||
RUN pip3 install -r requirements.txt
|
||||
|
||||
39
examples/daily-multi-translation/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# Daily Multi Translation
|
||||
|
||||
This example shows how to use Daily to stream multiple simultaneous translations using a single transport. Daily provides custom tracks and in this example we will simultaneously translate incoming audio in English to Spanish, French and German, each of them being sent to a custom track.
|
||||
|
||||
## Get started
|
||||
|
||||
```python
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
cp env.example .env # and add your credentials
|
||||
|
||||
```
|
||||
|
||||
## Run the server
|
||||
|
||||
```bash
|
||||
python server.py
|
||||
```
|
||||
|
||||
Then, visit `http://localhost:7860/` in your browser. This will open a Daily Prebuilt room where you will speak in English (make sure you are not muted).
|
||||
|
||||
## Open client
|
||||
|
||||
Next, you need to open the client that will listen to the translations.
|
||||
|
||||
```bash
|
||||
open index.html
|
||||
```
|
||||
|
||||
Once the client is opened, copy the URL of the Daily room created above and join it. You should be able to select which translation you want to hear.
|
||||
|
||||
## Build and test the Docker image
|
||||
|
||||
```
|
||||
docker build -t daily-multi-translation .
|
||||
docker run --env-file .env -p 7860:7860 daily-multi-translation
|
||||
```
|
||||
165
examples/daily-multi-translation/bot.py
Normal file
@@ -0,0 +1,165 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
BACKGROUND_SOUND_FILE = "office-ambience-mono-16000.mp3"
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Multi translation bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
audio_out_mixer={
|
||||
"spanish": SoundfileMixer(
|
||||
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
|
||||
),
|
||||
"french": SoundfileMixer(
|
||||
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
|
||||
),
|
||||
"german": SoundfileMixer(
|
||||
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
|
||||
),
|
||||
},
|
||||
audio_out_destinations=["spanish", "french", "german"],
|
||||
microphone_out_enabled=False, # Disable since we just use custom tracks
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts_spanish = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="cefcb124-080b-4655-b31f-932f3ee743de",
|
||||
transport_destination="spanish",
|
||||
)
|
||||
tts_french = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="8832a0b5-47b2-4751-bb22-6a8e2149303d",
|
||||
transport_destination="french",
|
||||
)
|
||||
tts_german = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="38aabb6a-f52b-4fb0-a3d1-988518f4dc06",
|
||||
transport_destination="german",
|
||||
)
|
||||
|
||||
messages_spanish = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You will be provided with a sentence in English, and your task is to only translate it into Spanish.",
|
||||
},
|
||||
]
|
||||
messages_french = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You will be provided with a sentence in English, and your task is to only translate it into French.",
|
||||
},
|
||||
]
|
||||
messages_german = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You will be provided with a sentence in English, and your task is to only translate it into German.",
|
||||
},
|
||||
]
|
||||
|
||||
llm_spanish = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm_french = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm_german = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
context_spanish = OpenAILLMContext(messages_spanish)
|
||||
context_aggregator_spanish = llm_spanish.create_context_aggregator(context_spanish)
|
||||
|
||||
context_french = OpenAILLMContext(messages_french)
|
||||
context_aggregator_french = llm_french.create_context_aggregator(context_french)
|
||||
|
||||
context_german = OpenAILLMContext(messages_german)
|
||||
context_aggregator_german = llm_german.create_context_aggregator(context_german)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
ParallelPipeline(
|
||||
# Spanish pipeline.
|
||||
[
|
||||
context_aggregator_spanish.user(),
|
||||
llm_spanish,
|
||||
tts_spanish,
|
||||
context_aggregator_spanish.assistant(),
|
||||
],
|
||||
# French pipeline.
|
||||
[
|
||||
context_aggregator_french.user(),
|
||||
llm_french,
|
||||
tts_french,
|
||||
context_aggregator_french.assistant(),
|
||||
],
|
||||
# German pipeline.
|
||||
[
|
||||
context_aggregator_german.user(),
|
||||
llm_german,
|
||||
tts_german,
|
||||
context_aggregator_german.assistant(),
|
||||
],
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
observers=[TranscriptionLogObserver()],
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,6 +1,5 @@
|
||||
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
|
||||
DAILY_API_KEY=7df...
|
||||
OPENAI_API_KEY=sk-PL...
|
||||
ELEVENLABS_API_KEY=aeb...
|
||||
CANONICAL_API_KEY=can...
|
||||
CANONICAL_API_URL=
|
||||
DEEPGRAM_API_KEY=efb...
|
||||
CARTESIA_API_KEY=aeb...
|
||||
202
examples/daily-multi-translation/index.html
Normal file
@@ -0,0 +1,202 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>daily multi translation</title>
|
||||
</head>
|
||||
<script crossorigin src="https://unpkg.com/@daily-co/daily-js"></script>
|
||||
<script
|
||||
src="https://code.jquery.com/jquery-3.1.1.min.js"
|
||||
integrity="sha256-hVVnYaiADRTO2PzUGmuLJr8BLUSjGIZsDYGmIJLv2b8="
|
||||
crossorigin="anonymous"
|
||||
></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.js"></script>
|
||||
<link
|
||||
rel="stylesheet"
|
||||
type="text/css"
|
||||
href="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.css"
|
||||
/>
|
||||
<script>
|
||||
function enableButton(buttonId, enable) {
|
||||
const button = document.getElementById(buttonId);
|
||||
button.disabled = !enable;
|
||||
}
|
||||
|
||||
function enableJoinButton(enable) {
|
||||
enableButton("join-button", enable);
|
||||
}
|
||||
|
||||
function enableLeaveButton(enable) {
|
||||
enableButton("leave-button", enable);
|
||||
}
|
||||
|
||||
function destroyPlayers(query) {
|
||||
const items = document.querySelectorAll(query);
|
||||
if (items) {
|
||||
for (const item of items) {
|
||||
item.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function destroyParticipantPlayers(participantId) {
|
||||
destroyPlayers(`video[data-participant-id="${participantId}"]`);
|
||||
destroyPlayers(`audio[data-participant-id="${participantId}"]`);
|
||||
destroyPlayers(`button[data-participant-id="${participantId}"]`);
|
||||
}
|
||||
|
||||
async function startPlayer(player, track) {
|
||||
player.muted = false;
|
||||
player.autoplay = true;
|
||||
if (track != null) {
|
||||
player.srcObject = new MediaStream([track]);
|
||||
}
|
||||
}
|
||||
|
||||
async function buildVideoPlayer(track, participantId) {
|
||||
const videoContainer = document.getElementById("video-container");
|
||||
const player = document.createElement("video");
|
||||
player.dataset.participantId = participantId;
|
||||
|
||||
videoContainer.appendChild(player);
|
||||
|
||||
await startPlayer(player, track);
|
||||
await player.play();
|
||||
|
||||
return player;
|
||||
}
|
||||
|
||||
async function buildAudioPlayer(track, participantId) {
|
||||
const audioContainer = document.getElementById("audio-container");
|
||||
const player = document.createElement("audio");
|
||||
player.dataset.participantId = participantId;
|
||||
|
||||
// Create a new button for controlling audio
|
||||
const audioControlButton = document.createElement("button");
|
||||
audioControlButton.className = "ui primary green button"
|
||||
audioControlButton.innerText = track._mediaTag == "cam-audio" ? "english" : track._mediaTag;
|
||||
audioControlButton.dataset.participantId = participantId;
|
||||
audioControlButton.onclick = () => {
|
||||
if (player.paused) {
|
||||
|
||||
player.play();
|
||||
audioControlButton.className = "ui primary red button"
|
||||
} else {
|
||||
player.pause();
|
||||
audioControlButton.className = "ui primary green button"
|
||||
}
|
||||
};
|
||||
|
||||
audioContainer.appendChild(player);
|
||||
audioContainer.appendChild(audioControlButton);
|
||||
|
||||
await startPlayer(player, track);
|
||||
player.pause()
|
||||
|
||||
return player;
|
||||
}
|
||||
|
||||
function subscribeToTracks(participantId) {
|
||||
console.log(`subscribing to track`);
|
||||
|
||||
if (participantId === "local") {
|
||||
return;
|
||||
}
|
||||
|
||||
callObject.updateParticipant(participantId, {
|
||||
setSubscribedTracks: {
|
||||
audio: true,
|
||||
video: true,
|
||||
custom: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function startDaily() {
|
||||
enableJoinButton(true);
|
||||
enableLeaveButton(false);
|
||||
|
||||
window.callObject = window.DailyIframe.createCallObject({});
|
||||
|
||||
callObject.on("participant-joined", (e) => {
|
||||
if (!e.participant.local) {
|
||||
console.log("participant-joined", e.participant);
|
||||
subscribeToTracks(e.participant.session_id);
|
||||
}
|
||||
});
|
||||
|
||||
callObject.on("participant-left", (e) => {
|
||||
console.log("participant-left", e.participant.session_id);
|
||||
destroyParticipantPlayers(e.participant.session_id);
|
||||
});
|
||||
|
||||
callObject.on("track-started", async (e) => {
|
||||
console.log("track-started", e.track);
|
||||
if (e.track.kind === "video") {
|
||||
await buildVideoPlayer(e.track, e.participant.session_id);
|
||||
} else if (e.track.kind === "audio") {
|
||||
await buildAudioPlayer(e.track, e.participant.session_id);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function joinRoom() {
|
||||
enableJoinButton(false);
|
||||
enableLeaveButton(true);
|
||||
|
||||
const meetingUrl = document.getElementById("meeting-url").value;
|
||||
|
||||
callObject.join({
|
||||
url: meetingUrl,
|
||||
startVideoOff: true,
|
||||
startAudioOff: true,
|
||||
subscribeToTracksAutomatically: false,
|
||||
receiveSettings: {
|
||||
base: { video: { layer: 0 } },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function leaveRoom() {
|
||||
enableJoinButton(true);
|
||||
enableLeaveButton(false);
|
||||
|
||||
callObject.leave();
|
||||
|
||||
const videoContainer = document.getElementById("video-container");
|
||||
videoContainer.replaceChildren();
|
||||
|
||||
const audioContainer = document.getElementById("audio-container");
|
||||
audioContainer.replaceChildren();
|
||||
}
|
||||
</script>
|
||||
|
||||
<body onload="startDaily()">
|
||||
<div class="ui centered page grid" style="margin-top: 30px">
|
||||
<div class="ten wide column">
|
||||
<div class="ui form" style="margin-top: 30px">
|
||||
<div class="field">
|
||||
<label>Meeting URL</label>
|
||||
<input id="meeting-url" value="" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="ui centered aligned header" style="margin-top: 30px">
|
||||
<button id="join-button" class="ui primary button" onclick="joinRoom()">
|
||||
Join
|
||||
</button>
|
||||
<button id="leave-button" class="ui button" onclick="leaveRoom()">
|
||||
Leave
|
||||
</button>
|
||||
</div>
|
||||
<div id="tile" class="ui container" style="margin-top: 30px">
|
||||
<div id="tile" class="ui center aligned grid">
|
||||
<div id="audio-container"></div><br/>
|
||||
</div>
|
||||
</div>
|
||||
<div id="tile" class="ui container" style="margin-top: 30px">
|
||||
<div id="tile" class="ui center aligned grid">
|
||||
<div id="video-container" class="ui segment"></div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
BIN
examples/daily-multi-translation/office-ambience-mono-16000.mp3
Normal file
5
examples/daily-multi-translation/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
aiofiles
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,deepgram,openai,silero,cartesia]
|
||||
55
examples/daily-multi-translation/runner.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Daily API Key (needed to create an owner token for the room)",
|
||||
)
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
|
||||
key = args.apikey or os.getenv("DAILY_API_KEY")
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
@@ -41,8 +41,7 @@ async def main(room_url: str, token: str):
|
||||
api_key=daily_api_key,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
video_out_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
|
||||
3
examples/deployment/modal-example/.gitignore
vendored
@@ -1,3 +1,6 @@
|
||||
# Modal clone
|
||||
modal-examples
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
@@ -1,37 +1,91 @@
|
||||
# Deploying Pipecat to Modal.com
|
||||
|
||||
Barebones deployment example for [modal.com](https://www.modal.com)
|
||||
Deployment example for [modal.com](https://www.modal.com). This example demonstrates how to deploy a FastAPI webapp to Modal with an RTVI compatible `/connect` endpoint that launches a Pipecat pipeline in a separate Modal container and returns a room/token for the client to join. This example also supports providing a parameter to the `/connect` endpoint for specifying which Pipecat pipeline to launch; openai, gemini, or vllm. The vllm pipeline points to a self-hosted OpenAI compatible LLM, using a llama model (neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16), deployed to Modal.
|
||||
|
||||
1. Install dependencies
|
||||

|
||||
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/active # or OS equivalent
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
# Running this Example
|
||||
|
||||
2. Setup .env
|
||||
## Install the Modal CLI
|
||||
|
||||
```bash
|
||||
cp env.example .env
|
||||
```
|
||||
Setup a Modal account and install it on your machine if you have not already, following their easy 3 steps in their [Getting Started Guide](https://modal.com/docs/guide#getting-started)
|
||||
|
||||
Alternatively, you can configure your Modal app to use [secrets](https://modal.com/docs/guide/secrets)
|
||||
## Deploy a self-serve LLM
|
||||
|
||||
3. Test the app locally
|
||||
1. Deploy Modal's OpenAI-compatible LLM service:
|
||||
|
||||
```bash
|
||||
modal serve app.py
|
||||
```
|
||||
```bash
|
||||
git clone https://github.com/modal-labs/modal-examples
|
||||
cd modal-examples
|
||||
modal deploy 06_gpu_and_ml/llm-serving/vllm_inference.py
|
||||
```
|
||||
|
||||
Refer to Modal's guide and example for [Deploying an OpenAI-compatible LLM service with vLLM](https://modal.com/docs/examples/vllm_inference) for more details.
|
||||
|
||||
2. Take note of the endpoint URL from the previous step, which will look like:
|
||||
```
|
||||
https://{your-workspace}--example-vllm-openai-compatible-serve.modal.run
|
||||
```
|
||||
You'll need this for the `bot_vllm.py` file in the next section.
|
||||
|
||||
**Note:** The default Modal LLM example uses Llama-3.1 and will shut down after 15 minutes of inactivity. Cold starts take 5-10 minutes. To prepare the service, we recommend visiting the `/docs` endpoint (`https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run/docs`) for your deployed LLM and wait for it to fully load before connecting your client.
|
||||
|
||||
## Deploy FastAPI App and Pipecat pipeline to Modal
|
||||
|
||||
1. Setup environment variables
|
||||
|
||||
```bash
|
||||
cd server
|
||||
cp env.example .env
|
||||
# Modify .env to provide your service API Keys
|
||||
```
|
||||
|
||||
Alternatively, you can configure your Modal app to use [secrets](https://modal.com/docs/guide/secrets)
|
||||
|
||||
2. Update the `modal_url` in `server/src/bot_vllm.py` to point to the url produced from the self-serve llm deploy, mentioned above.
|
||||
|
||||
3. From within the `server` directory, test the app locally:
|
||||
|
||||
```bash
|
||||
modal serve app.py
|
||||
```
|
||||
|
||||
4. Deploy to production
|
||||
|
||||
```bash
|
||||
modal deploy app.py
|
||||
```
|
||||
```bash
|
||||
modal deploy app.py
|
||||
```
|
||||
|
||||
## Configuration options
|
||||
5. Note the endpoint URL produced from this deployment. It will look like:
|
||||
|
||||
This app sets some sensible defaults for reducing cold starts, such as `minkeep_warm=1`, which will keep at least 1 warm instance ready for your bot function.
|
||||
```bash
|
||||
https://{your-workspace}--pipecat-modal-fastapi-app.modal.run
|
||||
```
|
||||
|
||||
It has been configured to only allow a concurrency of 1 (`max_inputs=1`) as each user will require their own running function.
|
||||
You'll need this URL for the client's `app.js` configuration mentioned in its README.
|
||||
|
||||
## Launch your bots on Modal
|
||||
|
||||
### Option 1: Direct Link
|
||||
|
||||
Simply click on the url displayed after running the server or deploy step to launch an agent and be redirected to a Daily room to talk with the launched bot. This will use the OpenAI pipeline.
|
||||
|
||||
### Option 2: Connect via an RTVI Client
|
||||
|
||||
Follow the instructions provided in the [client folder's README](client/javascript/README.md) for building and running a custom client that connects to your Modal endpoint. The provided client provides a dropdown for choosing which bot pipeline to run.
|
||||
|
||||
# Navigating your llm, server, and Pipecat logs
|
||||
|
||||
In your [Modal dashboard](https://modal.com/apps), you should have two Apps listed under Live Apps:
|
||||
|
||||
1. `example-vllm-openai-compatible`: This App contains the containers and logs used to run your self-hosted LLM. There will be just one App Function listed: `serve`. Click on this function to view logs for your LLM.
|
||||
2. `pipecat-modal`: This App contains the containers and logs used to run your `connect` endpoints and Pipecat pipelines. It will list two App Functions:
|
||||
1. `fastapi_app`: This function is running the endpoints that your client will interact with and initiate starting a new pipeline (`/`, `/connect`, `/status`). Click on this function to see logs for each endpoint hit.
|
||||
2. `bot_runner`: This function handles launching and running a bot pipeline. Click on this function to get a list of all pipeline runs and access each run's logs.
|
||||
|
||||
# Modal + Pipecat Tips
|
||||
|
||||
- In most other Pipecat examples, we use `Popen` to launch the pipeline process from the `/connect` endpoint. In this example, we use a Modal function instead. This allows us to run the pipelines using a separately defined Modal image as well as run each pipeline in an isolated container.
|
||||
- For the FastAPI and most common Pipecat Pipeline containers, a default `debian_slim` CPU-only should be all that's required to run. GPU containers are needed for self-hosted services.
|
||||
- To minimize cold starts of the pipeline and reduce latency for users, set `min_containers=1` on the Modal Function that launches the pipeline to ensure at least one warm instance of your function is always available.
|
||||
- For next steps on running a self-hosted llm and reducing latency, check out all of [Modal's LLM examples](https://modal.com/docs/examples/vllm_inference).
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
import modal
|
||||
from bot import _voice_bot_process
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from loguru import logger
|
||||
|
||||
MAX_SESSION_TIME = 15 * 60 # 15 minutes
|
||||
|
||||
app = modal.App("pipecat-modal")
|
||||
|
||||
|
||||
image = modal.Image.debian_slim(python_version="3.12").pip_install_from_requirements(
|
||||
"requirements.txt"
|
||||
)
|
||||
|
||||
|
||||
@app.function(
|
||||
image=image,
|
||||
cpu=1.0,
|
||||
secrets=[modal.Secret.from_dotenv()],
|
||||
keep_warm=1,
|
||||
enable_memory_snapshot=True,
|
||||
max_inputs=1, # Do not reuse instances across requests
|
||||
retries=0,
|
||||
)
|
||||
def launch_bot_process(room_url: str, token: str):
|
||||
_voice_bot_process(room_url, token)
|
||||
|
||||
|
||||
@app.function(
|
||||
image=image,
|
||||
secrets=[modal.Secret.from_dotenv()],
|
||||
)
|
||||
@modal.web_endpoint(method="POST")
|
||||
async def start():
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
logger.info("Request received")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
# Create new Daily room
|
||||
room = await daily_rest_helper.create_room(DailyRoomParams())
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Unable to create room",
|
||||
)
|
||||
logger.info(f"Created room: {room.url}")
|
||||
|
||||
# Create bot token for room
|
||||
token = await daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
|
||||
if not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
logger.info(f"Bot token created: {token}")
|
||||
|
||||
# Spawn a new bot process
|
||||
launch_bot_process.spawn(room_url=room.url, token=token)
|
||||
|
||||
# Return room URL to the user to join
|
||||
# Note: in production, you would want to return a token to the user
|
||||
return JSONResponse(content={"room_url": room.url, token: token})
|
||||
@@ -1,95 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str):
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY", ""), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
def _voice_bot_process(room_url: str, token: str):
|
||||
asyncio.run(main(room_url, token))
|
||||
1
examples/deployment/modal-example/client/javascript/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
node_modules
|
||||
@@ -0,0 +1,29 @@
|
||||
# JavaScript Implementation
|
||||
|
||||
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
|
||||
|
||||
## Setup
|
||||
|
||||
1. Deploy the Modal server. See the main [README](../../README).
|
||||
|
||||
2. Navigate to the `client/javascript` directory:
|
||||
|
||||
```bash
|
||||
cd client/javascript
|
||||
```
|
||||
|
||||
3. Modify the baseUrl in src/app.js to point to your deployed Modal endpoint
|
||||
|
||||
4. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
5. Run the client app:
|
||||
|
||||
```
|
||||
npm run dev
|
||||
```
|
||||
|
||||
6. Visit http://localhost:5173 in your browser.
|
||||
@@ -0,0 +1,49 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>AI Chatbot</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="status-bar">
|
||||
<div class="status">
|
||||
Status: <span id="connection-status">Disconnected</span>
|
||||
</div>
|
||||
<div class="controls">
|
||||
<select id="bot-selector">
|
||||
<option value="openai">OpenAI</option>
|
||||
<option value="gemini">Gemini</option>
|
||||
<option value="vllm">Llama</option>
|
||||
</select>
|
||||
<button id="connect-btn">Connect</button>
|
||||
<button id="disconnect-btn" disabled>Disconnect</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="main-content">
|
||||
<div class="bot-container">
|
||||
<div id="bot-video-container"></div>
|
||||
<audio id="bot-audio" autoplay></audio>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="device-bar">
|
||||
<div class="device-controls">
|
||||
<select id="device-selector"></select>
|
||||
<button id="mic-toggle-btn">Mute Mic</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="debug-panel">
|
||||
<h3>Debug Info</h3>
|
||||
<div id="debug-log"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="module" src="/src/app.js"></script>
|
||||
<link rel="stylesheet" href="/src/style.css" />
|
||||
</body>
|
||||
</html>
|
||||
1191
examples/deployment/modal-example/client/javascript/package-lock.json
generated
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "client",
|
||||
"version": "1.0.0",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.10"
|
||||
}
|
||||
}
|
||||
381
examples/deployment/modal-example/client/javascript/src/app.js
Normal file
@@ -0,0 +1,381 @@
|
||||
/**
|
||||
* Copyright (c) 2024–2025, Daily
|
||||
*
|
||||
* SPDX-License-Identifier: BSD 2-Clause License
|
||||
*/
|
||||
|
||||
/**
|
||||
* RTVI Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
*
|
||||
* Requirements:
|
||||
* - A running RTVI bot server (defaults to http://localhost:7860)
|
||||
* - The server must implement the /connect endpoint that returns Daily.co room credentials
|
||||
* - Browser with WebRTC support
|
||||
*/
|
||||
|
||||
import { RTVIClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
|
||||
/**
|
||||
* ChatbotClient handles the connection and media management for a real-time
|
||||
* voice and video interaction with an AI bot.
|
||||
*/
|
||||
class ChatbotClient {
|
||||
constructor() {
|
||||
// Initialize client state
|
||||
this.rtviClient = null;
|
||||
this.setupDOMElements();
|
||||
this.initializeClientAndTransport();
|
||||
this.setupEventListeners();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up references to DOM elements and create necessary media elements
|
||||
*/
|
||||
setupDOMElements() {
|
||||
// Get references to UI control elements
|
||||
this.connectBtn = document.getElementById('connect-btn');
|
||||
this.disconnectBtn = document.getElementById('disconnect-btn');
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.botVideoContainer = document.getElementById('bot-video-container');
|
||||
this.deviceSelector = document.getElementById('device-selector');
|
||||
|
||||
// Create an audio element for bot's voice output
|
||||
this.botAudio = document.createElement('audio');
|
||||
this.botAudio.autoplay = true;
|
||||
this.botAudio.playsInline = true;
|
||||
document.body.appendChild(this.botAudio);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up event listeners for connect/disconnect buttons
|
||||
*/
|
||||
setupEventListeners() {
|
||||
this.connectBtn.addEventListener('click', () => this.connect());
|
||||
this.disconnectBtn.addEventListener('click', () => this.disconnect());
|
||||
|
||||
// Populate device selector
|
||||
this.rtviClient.getAllMics().then((mics) => {
|
||||
console.log('Available mics:', mics);
|
||||
mics.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = device.deviceId;
|
||||
option.textContent = device.label || `Microphone ${device.deviceId}`;
|
||||
this.deviceSelector.appendChild(option);
|
||||
});
|
||||
});
|
||||
this.deviceSelector.addEventListener('change', (event) => {
|
||||
const selectedDeviceId = event.target.value;
|
||||
console.log('Selected device ID:', selectedDeviceId);
|
||||
this.rtviClient.updateMic(selectedDeviceId);
|
||||
});
|
||||
|
||||
// Handle mic mute/unmute toggle
|
||||
const micToggleBtn = document.getElementById('mic-toggle-btn');
|
||||
|
||||
micToggleBtn.addEventListener('click', () => {
|
||||
let micEnabled = this.rtviClient.isMicEnabled;
|
||||
micToggleBtn.textContent = micEnabled ? 'Unmute Mic' : 'Mute Mic';
|
||||
this.rtviClient.enableMic(!micEnabled);
|
||||
// Add logic to mute/unmute the mic
|
||||
if (micEnabled) {
|
||||
console.log('Mic muted');
|
||||
// Add code to mute the mic
|
||||
} else {
|
||||
console.log('Mic unmuted');
|
||||
// Add code to unmute the mic
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up the RTVI client and Daily transport
|
||||
*/
|
||||
async initializeClientAndTransport() {
|
||||
// Initialize the RTVI client with a DailyTransport and our configuration
|
||||
this.rtviClient = new RTVIClient({
|
||||
transport: new DailyTransport(),
|
||||
params: {
|
||||
// REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
baseUrl:
|
||||
'https://<Modal workspace>--pipecat-modal-bot-launcher.modal.run',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
requestData: {
|
||||
bot_name: 'openai',
|
||||
},
|
||||
},
|
||||
enableMic: true, // Enable microphone for user input
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
// Handle connection state changes
|
||||
onConnected: () => {
|
||||
this.updateStatus('Connected');
|
||||
this.connectBtn.disabled = true;
|
||||
this.disconnectBtn.disabled = false;
|
||||
this.log('Client connected');
|
||||
},
|
||||
onDisconnected: () => {
|
||||
this.updateStatus('Disconnected');
|
||||
this.connectBtn.disabled = false;
|
||||
this.disconnectBtn.disabled = true;
|
||||
this.log('Client disconnected');
|
||||
},
|
||||
// Handle transport state changes
|
||||
onTransportStateChanged: (state) => {
|
||||
this.updateStatus(`Transport: ${state}`);
|
||||
this.log(`Transport state changed: ${state}`);
|
||||
if (state === 'connecting') {
|
||||
window.startTime = Date.now();
|
||||
}
|
||||
if (state === 'ready') {
|
||||
this.setupMediaTracks();
|
||||
console.warn('TIME TO BOT READY:', Date.now() - window.startTime);
|
||||
}
|
||||
},
|
||||
// Handle bot connection events
|
||||
onBotConnected: (participant) => {
|
||||
this.log(`Bot connected: ${JSON.stringify(participant)}`);
|
||||
},
|
||||
onBotDisconnected: (participant) => {
|
||||
this.log(`Bot disconnected: ${JSON.stringify(participant)}`);
|
||||
},
|
||||
onBotReady: (data) => {
|
||||
this.log(`Bot ready: ${JSON.stringify(data)}`);
|
||||
this.setupMediaTracks();
|
||||
},
|
||||
// Transcript events
|
||||
onUserTranscript: (data) => {
|
||||
// Only log final transcripts
|
||||
if (data.final) {
|
||||
this.log(`User: ${data.text}`);
|
||||
}
|
||||
},
|
||||
onBotTranscript: (data) => {
|
||||
this.log(`Bot: ${data.text}`);
|
||||
},
|
||||
// Error handling
|
||||
onMessageError: (error) => {
|
||||
console.log('Message error:', error);
|
||||
},
|
||||
onMicUpdated: (data) => {
|
||||
console.log('Mic updated:', data);
|
||||
this.deviceSelector.value = data.deviceId;
|
||||
},
|
||||
onError: (error) => {
|
||||
console.log('Error:', JSON.stringify(error));
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// Set up listeners for media track events
|
||||
this.setupTrackListeners();
|
||||
|
||||
await this.rtviClient.initDevices();
|
||||
window.client = this.rtviClient;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a timestamped message to the debug log
|
||||
*/
|
||||
log(message) {
|
||||
const entry = document.createElement('div');
|
||||
entry.textContent = `${new Date().toISOString()} - ${message}`;
|
||||
|
||||
// Add styling based on message type
|
||||
if (message.startsWith('User: ')) {
|
||||
entry.style.color = '#2196F3'; // blue for user
|
||||
} else if (message.startsWith('Bot: ')) {
|
||||
entry.style.color = '#4CAF50'; // green for bot
|
||||
}
|
||||
|
||||
this.debugLog.appendChild(entry);
|
||||
this.debugLog.scrollTop = this.debugLog.scrollHeight;
|
||||
console.log(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the connection status display
|
||||
*/
|
||||
updateStatus(status) {
|
||||
this.statusSpan.textContent = status;
|
||||
this.log(`Status: ${status}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for available media tracks and set them up if present
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Get current tracks from the client
|
||||
const tracks = this.rtviClient.tracks();
|
||||
|
||||
// Set up any available bot tracks
|
||||
if (tracks.bot?.audio) {
|
||||
this.setupAudioTrack(tracks.bot.audio);
|
||||
}
|
||||
if (tracks.bot?.video) {
|
||||
this.setupVideoTrack(tracks.bot.video);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up listeners for track events (start/stop)
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local) {
|
||||
if (track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
} else if (track.kind === 'video') {
|
||||
this.setupVideoTrack(track);
|
||||
}
|
||||
this.log(
|
||||
`Track started event: ${track.kind} from ${
|
||||
participant?.name || 'unknown'
|
||||
}`
|
||||
);
|
||||
} else {
|
||||
this.log('Local mic unmuted');
|
||||
}
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
if (participant.local) {
|
||||
this.log('Local mic muted');
|
||||
return;
|
||||
}
|
||||
this.log(
|
||||
`Track stopped event: ${track.kind} from ${
|
||||
participant?.name || 'unknown'
|
||||
}`
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up an audio track for playback
|
||||
* Handles both initial setup and track updates
|
||||
*/
|
||||
setupAudioTrack(track) {
|
||||
this.log('Setting up audio track');
|
||||
// Check if we're already playing this track
|
||||
if (this.botAudio.srcObject) {
|
||||
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
// Create a new MediaStream with the track and set it as the audio source
|
||||
this.botAudio.srcObject = new MediaStream([track]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up a video track for display
|
||||
* Handles both initial setup and track updates
|
||||
*/
|
||||
setupVideoTrack(track) {
|
||||
this.log('Setting up video track');
|
||||
const videoEl = document.createElement('video');
|
||||
videoEl.autoplay = true;
|
||||
videoEl.playsInline = true;
|
||||
videoEl.muted = true;
|
||||
videoEl.style.width = '100%';
|
||||
videoEl.style.height = '100%';
|
||||
videoEl.style.objectFit = 'cover';
|
||||
|
||||
// Check if we're already displaying this track
|
||||
if (this.botVideoContainer.querySelector('video')?.srcObject) {
|
||||
const oldTrack = this.botVideoContainer
|
||||
.querySelector('video')
|
||||
.srcObject.getVideoTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
|
||||
// Create a new MediaStream with the track and set it as the video source
|
||||
videoEl.srcObject = new MediaStream([track]);
|
||||
this.botVideoContainer.innerHTML = '';
|
||||
this.botVideoContainer.appendChild(videoEl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
*/
|
||||
async connect() {
|
||||
try {
|
||||
const botSelector = document.getElementById('bot-selector');
|
||||
const selectedBot = botSelector.value;
|
||||
this.rtviClient.params.requestData.bot_name = selectedBot;
|
||||
|
||||
// Initialize audio/video devices
|
||||
this.log('Initializing devices...');
|
||||
await this.rtviClient.initDevices();
|
||||
|
||||
// Connect to the bot
|
||||
this.log(`Connecting to bot: ${selectedBot}`);
|
||||
await this.rtviClient.connect();
|
||||
|
||||
this.log('Connection complete');
|
||||
} catch (error) {
|
||||
// Handle any errors during connection
|
||||
console.error('Connection error:', error);
|
||||
this.log(`Error connecting: ${JSON.stringify(error.message)}`);
|
||||
this.log(`Error stack: ${error.stack}`);
|
||||
this.updateStatus('Error');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
await this.rtviClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
// Disconnect the RTVI client
|
||||
await this.rtviClient.disconnect();
|
||||
|
||||
// Clean up audio
|
||||
if (this.botAudio.srcObject) {
|
||||
this.botAudio.srcObject.getTracks().forEach((track) => track.stop());
|
||||
this.botAudio.srcObject = null;
|
||||
}
|
||||
|
||||
// Clean up video
|
||||
if (this.botVideoContainer.querySelector('video')?.srcObject) {
|
||||
const video = this.botVideoContainer.querySelector('video');
|
||||
video.srcObject.getTracks().forEach((track) => track.stop());
|
||||
video.srcObject = null;
|
||||
}
|
||||
this.botVideoContainer.innerHTML = '';
|
||||
} catch (error) {
|
||||
this.log(`Error disconnecting: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the client when the page loads
|
||||
window.addEventListener('DOMContentLoaded', () => {
|
||||
new ChatbotClient();
|
||||
});
|
||||
@@ -0,0 +1,135 @@
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.status-bar,
|
||||
.device-bar {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 10px;
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.controls,
|
||||
.device-controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px; /* Adds spacing between elements */
|
||||
}
|
||||
|
||||
.device-controls {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
.controls button,
|
||||
.device-controls button {
|
||||
padding: 8px 16px;
|
||||
margin-left: 10px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#bot-selector,
|
||||
#device-selector {
|
||||
padding: 8px 16px;
|
||||
padding-right: 40px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
background-color: #6c757d; /* Gray background */
|
||||
color: white; /* White text */
|
||||
cursor: pointer;
|
||||
appearance: none; /* Removes default browser styling for dropdowns */
|
||||
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='white'%3E%3Cpath d='M7 10l5 5 5-5z'/%3E%3C/svg%3E"); /* Custom arrow */
|
||||
background-repeat: no-repeat;
|
||||
background-position: right 8px center; /* Position the arrow */
|
||||
}
|
||||
|
||||
#bot-selector:focus,
|
||||
#device-selector:focus {
|
||||
outline: none;
|
||||
box-shadow: 0 0 4px rgba(0, 0, 0, 0.3); /* Add a subtle focus effect */
|
||||
}
|
||||
|
||||
#connect-btn {
|
||||
background-color: #4caf50;
|
||||
color: white;
|
||||
}
|
||||
|
||||
#disconnect-btn {
|
||||
background-color: #f44336;
|
||||
color: white;
|
||||
}
|
||||
|
||||
#mic-toggle-btn {
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.main-content {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.bot-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
#bot-video-container {
|
||||
width: 640px;
|
||||
height: 360px;
|
||||
background-color: #e0e0e0;
|
||||
border-radius: 8px;
|
||||
margin: 20px auto;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
#bot-video-container video {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
}
|
||||
|
||||
.debug-panel {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.debug-panel h3 {
|
||||
margin: 0 0 10px 0;
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
#debug-log {
|
||||
height: 200px;
|
||||
overflow-y: auto;
|
||||
background-color: #f8f8f8;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
font-family: monospace;
|
||||
font-size: 12px;
|
||||
line-height: 1.4;
|
||||
}
|
||||
BIN
examples/deployment/modal-example/diagram.jpg
Normal file
|
After Width: | Height: | Size: 114 KiB |
@@ -1,3 +0,0 @@
|
||||
DAILY_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
@@ -1,5 +0,0 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==0.71.3
|
||||
pipecat-ai[daily,silero,cartesia,openai]==0.0.52
|
||||
fastapi==0.115.6
|
||||
aiohttp==3.11.11
|
||||
307
examples/deployment/modal-example/server/app.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""modal_example.
|
||||
|
||||
This module shows a simple example of how to deploy a bot using Modal and FastAPI.
|
||||
|
||||
It includes:
|
||||
- FastAPI endpoints for starting agents and checking bot statuses.
|
||||
- Dynamic loading of bot implementations.
|
||||
- Use of a Daily transport for bot communication.
|
||||
"""
|
||||
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import importlib
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any, Dict, Literal
|
||||
|
||||
import aiohttp
|
||||
import modal
|
||||
from fastapi import APIRouter, FastAPI, HTTPException
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
# container specifications for the FastAPI web server
|
||||
web_image = (
|
||||
modal.Image.debian_slim(python_version="3.13")
|
||||
.pip_install_from_requirements("requirements.txt")
|
||||
.pip_install("pipecat-ai[daily]")
|
||||
.add_local_dir("src", remote_path="/root/src")
|
||||
)
|
||||
|
||||
# container specifications for the Pipecat pipeline
|
||||
bot_image = (
|
||||
modal.Image.debian_slim(python_version="3.13")
|
||||
.apt_install("ffmpeg")
|
||||
.pip_install_from_requirements("requirements.txt")
|
||||
.pip_install("pipecat-ai[daily,elevenlabs,openai,silero,google]")
|
||||
.add_local_dir("src", remote_path="/root/src")
|
||||
)
|
||||
|
||||
app = modal.App("pipecat-modal", secrets=[modal.Secret.from_dotenv()])
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
bot_jobs = {}
|
||||
daily_helpers = {}
|
||||
|
||||
# Names of all supported bot implementations
|
||||
# These correspond to the bot files in the src directory
|
||||
BotName = Literal["openai", "gemini", "vllm"]
|
||||
|
||||
|
||||
def cleanup():
|
||||
"""Cleanup function to terminate all bot processes.
|
||||
|
||||
Called during server shutdown.
|
||||
"""
|
||||
for entry in bot_jobs.values():
|
||||
func = modal.FunctionCall.from_id(entry[0])
|
||||
if func:
|
||||
func.cancel()
|
||||
|
||||
|
||||
def get_bot_file(bot_name: BotName) -> str:
|
||||
"""Retrieve the bot file name corresponding to the provided bot_name.
|
||||
|
||||
Args:
|
||||
bot_name (BotName): The name of the bot (e.g., 'openai', 'gemini', 'vllm').
|
||||
|
||||
Returns:
|
||||
str: The file name corresponding to the bot implementation.
|
||||
|
||||
Raises:
|
||||
ValueError: If the bot name is invalid or not supported.
|
||||
"""
|
||||
# bot_implementation = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
bot_implementation = bot_name.lower().strip()
|
||||
if not bot_implementation:
|
||||
bot_implementation = "openai"
|
||||
if bot_implementation not in ["openai", "gemini", "vllm"]:
|
||||
raise ValueError(
|
||||
f"Invalid BOT_IMPLEMENTATION: {bot_implementation}. Must be 'openai' or 'gemini' or 'vllm'"
|
||||
)
|
||||
|
||||
return f"bot_{bot_implementation}"
|
||||
|
||||
|
||||
def get_runner(path: str, bot_file: str) -> callable:
|
||||
"""Dynamically import the run_bot function based on the bot name.
|
||||
|
||||
Args:
|
||||
path (str): The path to the bot files (e.g., 'src').
|
||||
bot_file (str): The file name of the bot implementation (e.g., 'openai', 'gemini', 'vllm').
|
||||
|
||||
Returns:
|
||||
function: The run_bot function from the specified bot module.
|
||||
|
||||
Raises:
|
||||
ImportError: If the specified bot module or run_bot function is not found.
|
||||
"""
|
||||
try:
|
||||
# Dynamically construct the module name
|
||||
module_name = f"{path}.{bot_file}"
|
||||
# Import the module
|
||||
module = importlib.import_module(module_name)
|
||||
# Get the run_bot function from the module
|
||||
return getattr(module, "run_bot")
|
||||
except (ImportError, AttributeError) as e:
|
||||
raise ImportError(f"Failed to import run_bot from {module_name}: {e}")
|
||||
|
||||
|
||||
async def create_room_and_token() -> tuple[str, str]:
|
||||
"""Create a Daily room and generate an authentication token.
|
||||
|
||||
This function checks for existing room URL and token in the environment variables.
|
||||
If not found, it creates a new room using the Daily API and generates a token for it.
|
||||
|
||||
Returns:
|
||||
tuple[str, str]: A tuple containing the room URL and the authentication token.
|
||||
|
||||
Raises:
|
||||
HTTPException: If room creation or token generation fails.
|
||||
"""
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRoomParams
|
||||
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
|
||||
token = os.getenv("DAILY_SAMPLE_ROOM_TOKEN", None)
|
||||
if not room_url:
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
if not room.url:
|
||||
raise HTTPException(status_code=500, detail="Failed to create room")
|
||||
room_url = room.url
|
||||
|
||||
token = await daily_helpers["rest"].get_token(room_url)
|
||||
if not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
return room_url, token
|
||||
|
||||
|
||||
@app.function(image=bot_image, min_containers=1)
|
||||
async def bot_runner(room_url, token, bot_name: BotName = "openai"):
|
||||
"""Launch the provided bot process, providing the given room URL and token for the bot to join.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room where the bot and client will communicate.
|
||||
token (str): The authentication token for the room.
|
||||
bot_name (BotName): The name of the bot implementation to use. Defaults to "openai".
|
||||
|
||||
Raises:
|
||||
HTTPException: If the bot pipeline fails to start.
|
||||
"""
|
||||
try:
|
||||
path = "src"
|
||||
bot_file = get_bot_file(bot_name)
|
||||
run_bot = get_runner(path, bot_file)
|
||||
|
||||
print(f"Starting bot process: {bot_file} -u {room_url} -t {token}")
|
||||
await run_bot(room_url, token)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start bot pipeline: {e}")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""FastAPI lifespan manager that handles startup and shutdown tasks.
|
||||
|
||||
- Creates aiohttp session
|
||||
- Initializes Daily API helper
|
||||
- Cleans up resources on shutdown
|
||||
"""
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
class ConnectData(BaseModel):
|
||||
"""Data provided by client to specify the bot pipeline.
|
||||
|
||||
Attributes:
|
||||
bot_name (BotName): The name of the bot to connect to. Defaults to "openai".
|
||||
"""
|
||||
|
||||
bot_name: BotName = "openai"
|
||||
|
||||
|
||||
async def start(data: ConnectData):
|
||||
"""Internal method to start a bot agent and return the room URL and token.
|
||||
|
||||
Args:
|
||||
data (ConnectData): The data containing the bot name to use.
|
||||
|
||||
Returns:
|
||||
tuple[str, str]: A tuple containing the room URL and token.
|
||||
"""
|
||||
room_url, token = await create_room_and_token()
|
||||
launch_bot_func = modal.Function.from_name("pipecat-modal", "bot_runner")
|
||||
function_id = launch_bot_func.spawn(room_url, token, data.bot_name)
|
||||
bot_jobs[function_id] = (function_id, room_url)
|
||||
|
||||
return room_url, token
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def start_agent():
|
||||
"""A user endpoint for launching a bot agent and redirecting to the created room URL.
|
||||
|
||||
This function retrieves the bot implementation from the environment,
|
||||
starts the bot agent, and redirects the user to the room URL to
|
||||
interact with the bot through a Daily Prebuilt Interface.
|
||||
|
||||
Returns:
|
||||
RedirectResponse: A response that redirects to the room URL.
|
||||
"""
|
||||
bot_name = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
print(f"Starting bot: {bot_name}")
|
||||
room_url, token = await start(ConnectData(bot_name=bot_name))
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
|
||||
|
||||
@router.post("/connect")
|
||||
async def rtvi_connect(data: ConnectData) -> Dict[Any, Any]:
|
||||
"""A user endpoint for launching a bot agent and retrieving the room/token credentials.
|
||||
|
||||
This function retrieves the bot implementation from the request, if provided,
|
||||
starts the bot agent, and returns the room URL and token for the bot. This allows the
|
||||
client to then connect to the bot using their own RTVI interface.
|
||||
|
||||
Args:
|
||||
data (ConnectData): Optional. The data containing the bot name to use.
|
||||
|
||||
Returns:
|
||||
Dict[Any, Any]: A dictionary containing the room URL and token.
|
||||
"""
|
||||
print(f"Starting bot: {data.bot_name}")
|
||||
if data is None or not data.bot_name:
|
||||
data.bot_name = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
room_url, token = await start(data)
|
||||
|
||||
return {"room_url": room_url, "token": token}
|
||||
|
||||
|
||||
@router.get("/status/{fid}")
|
||||
def get_status(fid: str):
|
||||
"""Retrieve the status of a bot process by its function ID.
|
||||
|
||||
Args:
|
||||
fid (str): The function ID of the bot process.
|
||||
|
||||
Returns:
|
||||
JSONResponse: A JSON response containing the bot's status and result code.
|
||||
|
||||
Raises:
|
||||
HTTPException: If the bot process with the given ID is not found.
|
||||
"""
|
||||
func = modal.FunctionCall.from_id(fid)
|
||||
if not func:
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {fid} not found")
|
||||
|
||||
try:
|
||||
result = func.get(timeout=0)
|
||||
return JSONResponse({"bot_id": fid, "status": "finished", "code": result})
|
||||
except modal.exception.OutputExpiredError:
|
||||
return JSONResponse({"bot_id": fid, "status": "finished", "code": 404})
|
||||
except TimeoutError:
|
||||
return JSONResponse({"bot_id": fid, "status": "running", "code": 202})
|
||||
|
||||
|
||||
@app.function(image=web_image, min_containers=1)
|
||||
@modal.concurrent(max_inputs=1)
|
||||
@modal.asgi_app()
|
||||
def fastapi_app():
|
||||
"""Create and configure the FastAPI application.
|
||||
|
||||
This function initializes the FastAPI app with middleware, routes, and lifespan management.
|
||||
It is decorated to be used as a Modal ASGI app.
|
||||
"""
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
# Initialize FastAPI app
|
||||
web_app = FastAPI(lifespan=lifespan)
|
||||
|
||||
web_app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include the endpoints from endpoints.py
|
||||
web_app.include_router(router)
|
||||
|
||||
return web_app
|
||||
14
examples/deployment/modal-example/server/env.example
Normal file
@@ -0,0 +1,14 @@
|
||||
DAILY_API_KEY=
|
||||
|
||||
# determines which bot file to default to: 'openai', 'gemini', or 'vllm'
|
||||
BOT_IMPLEMENTATION=openai
|
||||
|
||||
# needed for the openai bot pipeline
|
||||
OPENAI_API_KEY=
|
||||
ELEVENLABS_API_KEY=
|
||||
|
||||
# needed for the gemini live bot pipeline
|
||||
GOOGLE_API_KEY=
|
||||
|
||||
# needed if you modified the API Key for your self-hosted LLM
|
||||
VLLM_API_KEY=
|
||||
@@ -0,0 +1,2 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==0.71.3
|
||||
BIN
examples/deployment/modal-example/server/src/assets/robot01.png
Normal file
|
After Width: | Height: | Size: 759 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot010.png
Normal file
|
After Width: | Height: | Size: 884 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot011.png
Normal file
|
After Width: | Height: | Size: 876 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot012.png
Normal file
|
After Width: | Height: | Size: 881 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot013.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot014.png
Normal file
|
After Width: | Height: | Size: 874 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot015.png
Normal file
|
After Width: | Height: | Size: 882 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot016.png
Normal file
|
After Width: | Height: | Size: 885 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot017.png
Normal file
|
After Width: | Height: | Size: 888 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot018.png
Normal file
|
After Width: | Height: | Size: 890 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot019.png
Normal file
|
After Width: | Height: | Size: 898 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot02.png
Normal file
|
After Width: | Height: | Size: 836 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot020.png
Normal file
|
After Width: | Height: | Size: 903 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot021.png
Normal file
|
After Width: | Height: | Size: 908 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot022.png
Normal file
|
After Width: | Height: | Size: 908 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot023.png
Normal file
|
After Width: | Height: | Size: 905 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot024.png
Normal file
|
After Width: | Height: | Size: 903 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot025.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot03.png
Normal file
|
After Width: | Height: | Size: 849 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot04.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot05.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot06.png
Normal file
|
After Width: | Height: | Size: 864 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot07.png
Normal file
|
After Width: | Height: | Size: 858 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot08.png
Normal file
|
After Width: | Height: | Size: 875 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot09.png
Normal file
|
After Width: | Height: | Size: 881 KiB |
198
examples/deployment/modal-example/server/src/bot_gemini.py
Normal file
@@ -0,0 +1,198 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Gemini Bot Implementation.
|
||||
|
||||
This module implements a chatbot using Google's Gemini Multimodal Live model.
|
||||
It includes:
|
||||
- Real-time audio/video interaction through Daily
|
||||
- Animated robot avatar
|
||||
- Speech-to-speech model
|
||||
|
||||
The bot runs as part of a pipeline that processes audio/video frames and manages
|
||||
the conversation flow using Gemini's streaming capabilities.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
try:
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
except ValueError:
|
||||
# Handle the case where logger is already initialized
|
||||
pass
|
||||
|
||||
sprites = []
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
for i in range(1, 26):
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
# Create a smooth animation by adding reversed frames
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
|
||||
# Define static and animated states
|
||||
quiet_frame = sprites[0] # Static frame for when bot is listening
|
||||
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
|
||||
|
||||
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""Manages the bot's visual animation states.
|
||||
|
||||
Switches between static (listening) and animated (talking) states based on
|
||||
the bot's current speaking status.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and update animation state.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Switch to talking animation when bot starts speaking
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
# Return to static frame when bot stops speaking
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self.push_frame(quiet_frame)
|
||||
self._is_talking = False
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def run_bot(room_url: str, token: str):
|
||||
"""Main bot execution function.
|
||||
|
||||
Sets up and runs the bot pipeline including:
|
||||
- Daily video transport with specific audio parameters
|
||||
- Gemini Live multimodal model integration
|
||||
- Voice activity detection
|
||||
- Animation processing
|
||||
- RTVI event handling
|
||||
"""
|
||||
# Set up Daily transport with specific audio/video parameters for Gemini
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_audio_passthrough=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize the Gemini Multimodal Live model
|
||||
llm = GeminiMultimodalLiveLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
voice_id="Puck", # Aoede, Charon, Fenrir, Kore, Puck
|
||||
transcribe_user_audio=True,
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.",
|
||||
},
|
||||
]
|
||||
|
||||
# Set up conversation context and management
|
||||
# The context_aggregator will automatically collect conversation context
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
#
|
||||
# RTVI events for Pipecat client UI
|
||||
#
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
ta,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
await task.queue_frame(quiet_frame)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
226
examples/deployment/modal-example/server/src/bot_openai.py
Normal file
@@ -0,0 +1,226 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""OpenAI Bot Implementation.
|
||||
|
||||
This module implements a chatbot using OpenAI's GPT-4 model for natural language
|
||||
processing. It includes:
|
||||
- Real-time audio/video interaction through Daily
|
||||
- Animated robot avatar
|
||||
- Text-to-speech using ElevenLabs
|
||||
- Support for both English and Spanish
|
||||
|
||||
The bot runs as part of a pipeline that processes audio/video frames and manages
|
||||
the conversation flow.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
try:
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
except ValueError:
|
||||
# Handle the case where logger is already initialized
|
||||
pass
|
||||
|
||||
sprites = []
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
# Load sequential animation frames
|
||||
for i in range(1, 26):
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
# Create a smooth animation by adding reversed frames
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
|
||||
# Define static and animated states
|
||||
quiet_frame = sprites[0] # Static frame for when bot is listening
|
||||
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
|
||||
|
||||
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""Manages the bot's visual animation states.
|
||||
|
||||
Switches between static (listening) and animated (talking) states based on
|
||||
the bot's current speaking status.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and update animation state.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Switch to talking animation when bot starts speaking
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
# Return to static frame when bot stops speaking
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self.push_frame(quiet_frame)
|
||||
self._is_talking = False
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def run_bot(room_url: str, token: str):
|
||||
"""Main bot execution function.
|
||||
|
||||
Sets up and runs the bot pipeline including:
|
||||
- Daily video transport
|
||||
- Speech-to-text and text-to-speech services
|
||||
- Language model integration
|
||||
- Animation processing
|
||||
- RTVI event handling
|
||||
"""
|
||||
# Set up Daily transport with video/audio parameters
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# transcription_settings=DailyTranscriptionSettings(
|
||||
# language="es",
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize text-to-speech service
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="SAz9YHcvj6GT2YYXdXww",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
# Initialize LLM service
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
#
|
||||
# English
|
||||
#
|
||||
"content": "You are an incessant one-upper. Start by asking the user how their day is going.",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
|
||||
},
|
||||
]
|
||||
|
||||
# Set up conversation context and management
|
||||
# The context_aggregator will automatically collect conversation context
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
#
|
||||
# RTVI events for Pipecat client UI
|
||||
#
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
await task.queue_frame(quiet_frame)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
239
examples/deployment/modal-example/server/src/bot_vllm.py
Normal file
@@ -0,0 +1,239 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""OpenAI Bot Implementation.
|
||||
|
||||
This module implements a chatbot using OpenAI's GPT-4 model for natural language
|
||||
processing. It includes:
|
||||
- Real-time audio/video interaction through Daily
|
||||
- Animated robot avatar
|
||||
- Text-to-speech using ElevenLabs
|
||||
- Support for both English and Spanish
|
||||
|
||||
The bot runs as part of a pipeline that processes audio/video frames and manages
|
||||
the conversation flow.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
try:
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
except ValueError:
|
||||
# Handle the case where logger is already initialized
|
||||
pass
|
||||
|
||||
# REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
modal_url = "https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run"
|
||||
api_key = os.getenv("VLLM_API_KEY", "super-secret-key")
|
||||
|
||||
|
||||
sprites = []
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
# Load sequential animation frames
|
||||
for i in range(1, 26):
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
# Create a smooth animation by adding reversed frames
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
|
||||
# Define static and animated states
|
||||
quiet_frame = sprites[0] # Static frame for when bot is listening
|
||||
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
|
||||
|
||||
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""Manages the bot's visual animation states.
|
||||
|
||||
Switches between static (listening) and animated (talking) states based on
|
||||
the bot's current speaking status.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and update animation state.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Switch to talking animation when bot starts speaking
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
# Return to static frame when bot stops speaking
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self.push_frame(quiet_frame)
|
||||
self._is_talking = False
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def run_bot(room_url: str, token: str):
|
||||
"""Main bot execution function.
|
||||
|
||||
Sets up and runs the bot pipeline including:
|
||||
- Daily video transport
|
||||
- Speech-to-text and text-to-speech services
|
||||
- Language model integration
|
||||
- Animation processing
|
||||
- RTVI event handling
|
||||
"""
|
||||
# Set up Daily transport with video/audio parameters
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# transcription_settings=DailyTranscriptionSettings(
|
||||
# language="es",
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize text-to-speech service
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="D38z5RcWu1voky8WS1ja",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
# Initialize LLM service
|
||||
llm = OpenAILLMService(
|
||||
# To use OpenAI
|
||||
api_key=api_key,
|
||||
# Or, to use a local vLLM (or similar) api server
|
||||
model="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
|
||||
base_url=f"{modal_url}/v1",
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
#
|
||||
# English
|
||||
#
|
||||
"content": "You are a salesman for Modal, the cloud-native serverless Python computing platform.",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
|
||||
},
|
||||
]
|
||||
|
||||
# Set up conversation context and management
|
||||
# The context_aggregator will automatically collect conversation context
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
#
|
||||
# RTVI events for Pipecat client UI
|
||||
#
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
await task.queue_frame(quiet_frame)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
84
examples/deployment/modal-example/server/src/runner.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
|
||||
|
||||
def get_bot_file(arg_bot: str | None) -> str:
|
||||
bot_implementation = arg_bot or os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
if not bot_implementation:
|
||||
bot_implementation = "openai"
|
||||
if bot_implementation not in ["openai", "gemini", "vllm"]:
|
||||
raise ValueError(
|
||||
f"Invalid BOT_IMPLEMENTATION: {bot_implementation}. Must be 'openai' or 'gemini'"
|
||||
)
|
||||
return f"bot_{bot_implementation}"
|
||||
|
||||
|
||||
def get_runner(bot_file: str):
|
||||
"""Dynamically import the run_bot function based on the bot name.
|
||||
|
||||
Args:
|
||||
bot_name (str): The name of the bot implementation (e.g., 'openai', 'gemini').
|
||||
|
||||
Returns:
|
||||
function: The run_bot function from the specified bot module.
|
||||
|
||||
Raises:
|
||||
ImportError: If the specified bot module or run_bot function is not found.
|
||||
"""
|
||||
try:
|
||||
# Dynamically construct the module name
|
||||
module_name = f"{bot_file}"
|
||||
# Import the module
|
||||
module = importlib.import_module(module_name)
|
||||
# Get the run_bot function from the module
|
||||
return getattr(module, "run_bot")
|
||||
except (ImportError, AttributeError) as e:
|
||||
raise ImportError(f"Failed to import run_bot from {module_name}: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Parse the args to launch the appropriate bot using the given room/token."""
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--token",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Daily room token",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
"--bot",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Bot runner to use (e.g., openai, gemini)",
|
||||
)
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
|
||||
token = args.token or os.getenv("DAILY_SAMPLE_ROOM_TOKEN")
|
||||
bot_file = get_bot_file(args.bot)
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
run_bot = get_runner(bot_file)
|
||||
asyncio.run(run_bot(url, token))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -100,7 +100,28 @@ phone numbers with valid values for your use case.
|
||||
|
||||
### Dialin Request
|
||||
|
||||
The server will receive a request when a call is received from Daily.
|
||||
The server will receive a request when a call is received from Daily.
|
||||
The payload that the webhook received is as follows:
|
||||
```json
|
||||
{
|
||||
// for dial-in from webhook
|
||||
"To": "+14152251493",
|
||||
"From": "+14158483432",
|
||||
"callId": "string-contains-uuid",
|
||||
"callDomain": "string-contains-uuid",
|
||||
"sipHeaders": {
|
||||
"X-My-Custom-Header": "value",
|
||||
"x-caller": "+1234567890",
|
||||
"x-called": "+1987654321",
|
||||
},
|
||||
}
|
||||
```
|
||||
The `To`, `From`, `callId`, `callDomain` fields are converted to
|
||||
`snake_case` and mapped to `dialin_settings`. In addition, `sipHeader`
|
||||
contains any custom SIP headers received by Daily on the SIP
|
||||
interconnect address (`sip_uri`). These are headers sent from
|
||||
Twilio or other external SIP platforms, for example, to send the
|
||||
caller's phone number.
|
||||
|
||||
### Dialout Request
|
||||
|
||||
@@ -158,6 +179,7 @@ curl -X POST http://localhost:3000/api/dial \
|
||||
"From": "+1987654321",
|
||||
"callId": "call-uuid-123",
|
||||
"callDomain": "domain-uuid-456",
|
||||
"sipHeader": {},
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
|
||||
@@ -39,6 +39,11 @@ class RoomRequest(BaseModel):
|
||||
None, description="A flag to perform voicemail or answeing-machine detection"
|
||||
)
|
||||
call_transfer: Optional[Dict[str, Any]] = Field(None, description="to initiate a call transfer")
|
||||
sipHeaders: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
alias="sip_headers",
|
||||
description="Custom SIP headers received from the external SIP provider",
|
||||
)
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
@@ -57,6 +62,14 @@ class RoomRequest(BaseModel):
|
||||
"callDomain": "string-contains-uuid"
|
||||
These need to be remapped to dialin_settings
|
||||
|
||||
In addition, we may receive in the body that can be
|
||||
sent to the bot as a custom field, sip_headers
|
||||
"sipHeaders": {
|
||||
"X-My-Custom-Header": "value",
|
||||
"x-caller": "+14158483432",
|
||||
"x-called": "+14152251493",
|
||||
},
|
||||
|
||||
"dialout_settings": [
|
||||
{"phoneNumber": "+14158483432", "callerId": "+14152251493"},
|
||||
{"sipUri": "sip:username@sip.hostname"}
|
||||
@@ -157,6 +170,7 @@ async def dial(request: RoomRequest, raw_request: Request):
|
||||
"dialout_settings": request.dialout_settings,
|
||||
"voicemail_detection": request.voicemail_detection,
|
||||
"call_transfer": request.call_transfer,
|
||||
"sip_headers": request.sipHeaders, # passing the SIP headers to the bot
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ export default async function handler(req, res) {
|
||||
From,
|
||||
callId,
|
||||
callDomain,
|
||||
sipHeaders,
|
||||
dialout_settings,
|
||||
voicemail_detection,
|
||||
call_transfer
|
||||
@@ -117,6 +118,7 @@ export default async function handler(req, res) {
|
||||
dialout_settings,
|
||||
voicemail_detection,
|
||||
call_transfer,
|
||||
sip_headers: sipHeaders,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
@@ -21,44 +22,23 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
# Check if we're in local development mode
|
||||
LOCAL_RUN = os.getenv("LOCAL_RUN")
|
||||
if LOCAL_RUN:
|
||||
import asyncio
|
||||
import webbrowser
|
||||
|
||||
try:
|
||||
from local_runner import configure
|
||||
except ImportError:
|
||||
logger.error("Could not import local_runner module. Local development mode may not work.")
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Check if we're in local development mode
|
||||
LOCAL_RUN = os.getenv("LOCAL_RUN")
|
||||
|
||||
async def main(room_url: str, token: str):
|
||||
|
||||
async def main(transport: DailyTransport):
|
||||
"""Main pipeline setup and execution function.
|
||||
|
||||
Args:
|
||||
room_url: The Daily room URL
|
||||
token: The Daily room token
|
||||
transport: The DailyTransport object for the bot
|
||||
"""
|
||||
logger.debug("Starting bot in room: {}", room_url)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
logger.debug("Starting bot")
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"), voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22"
|
||||
api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
@@ -126,10 +106,25 @@ async def bot(args: DailySessionArguments):
|
||||
body: The configuration object from the request body
|
||||
session_id: The session ID for logging
|
||||
"""
|
||||
from pipecat.audio.filters.krisp_filter import KrispFilter
|
||||
|
||||
logger.info(f"Bot process initialized {args.room_url} {args.token}")
|
||||
|
||||
transport = DailyTransport(
|
||||
args.room_url,
|
||||
args.token,
|
||||
"Pipecat Bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_in_filter=None if LOCAL_RUN else KrispFilter(),
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
await main(args.room_url, args.token)
|
||||
await main(transport)
|
||||
logger.info("Bot process completed")
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in bot process: {str(e)}")
|
||||
@@ -137,18 +132,27 @@ async def bot(args: DailySessionArguments):
|
||||
|
||||
|
||||
# Local development functions
|
||||
async def local_main():
|
||||
async def local_daily():
|
||||
"""Function for local development testing."""
|
||||
from local_runner import configure
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
logger.warning("_")
|
||||
logger.warning("_")
|
||||
logger.warning(f"Talk to your voice agent here: {room_url}")
|
||||
logger.warning("_")
|
||||
logger.warning("_")
|
||||
webbrowser.open(room_url)
|
||||
await main(room_url, token)
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Pipecat Bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
await main(transport)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in local development mode: {e}")
|
||||
|
||||
@@ -156,6 +160,6 @@ async def local_main():
|
||||
# Local development entry point
|
||||
if LOCAL_RUN and __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(local_main())
|
||||
asyncio.run(local_daily())
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to run in local mode: {e}")
|
||||
|
||||
@@ -1,2 +1,4 @@
|
||||
CARTESIA_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
# Local dev only
|
||||
DAILY_API_KEY=
|
||||
@@ -7,6 +7,7 @@
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from fastapi import HTTPException
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
agent_name = "my-first-agent"
|
||||
image = "your-username/my-first-agent:0.1"
|
||||
image_credentials = "your-dockerhub-creds"
|
||||
secret_set = "my-first-agent-secrets"
|
||||
enable_krisp = true
|
||||
|
||||
[scaling]
|
||||
min_instances = 0
|
||||
|
||||
51
examples/fal-smart-turn/.gitignore
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
dist/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# JavaScript/Node.js
|
||||
node_modules/
|
||||
dist/
|
||||
dist-ssr/
|
||||
*.local
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
|
||||
# Logs
|
||||
logs/
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
|
||||
# Editor/IDE
|
||||
.vscode/*
|
||||
!.vscode/extensions.json
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
.DS_Store
|
||||
|
||||
# Project specific
|
||||
runpod.toml
|
||||
152
examples/fal-smart-turn/README.md
Normal file
@@ -0,0 +1,152 @@
|
||||
# Smart Turn Detection Demo
|
||||
|
||||
This demo showcases Pipecat's Smart Turn Detection feature - an advanced conversational turn detection system that uses machine learning to identify when a speaker has finished their turn in a conversation. Unlike basic Voice Activity Detection (VAD) which only detects speech vs. silence, Smart Turn detects natural conversational cues like intonation patterns, pacing, and linguistic signals.
|
||||
|
||||
This demo uses the [pipecat-ai/smart-turn](https://huggingface.co/pipecat-ai/smart-turn) model - an open-source, community-driven conversational turn detection model designed to provide more natural turn-taking in voice interactions. The model is being hosted on Fal's infrastructure for GPU acceleration, offering inference times between 50-60ms.
|
||||
|
||||
In the client UI, you can see the transcription messages along with the smart-turn model's prediction results in real-time.
|
||||
|
||||
## Try the demo
|
||||
|
||||
Try the hosted version of the demo here: https://pcc-smart-turn.vercel.app/.
|
||||
|
||||
## Run the demo locally
|
||||
|
||||
### Run the Server
|
||||
|
||||
1. Set up and activate your virtual environment:
|
||||
|
||||
```bash
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Create your .env file and set your env vars:
|
||||
|
||||
```bash
|
||||
cp env.example .env
|
||||
```
|
||||
|
||||
Keys to provide:
|
||||
|
||||
- GOOGLE_API_KEY
|
||||
- CARTESIA_API_KEY
|
||||
- DEEPGRAM_API_KEY
|
||||
- DAILY_API_KEY
|
||||
- FAL_SMART_TURN_API_KEY
|
||||
|
||||
4. Run the server:
|
||||
|
||||
```bash
|
||||
LOCAL=1 python server.py
|
||||
```
|
||||
|
||||
### Run the client
|
||||
|
||||
1. Open a new terminal and navigate to the client directory:
|
||||
|
||||
```bash
|
||||
cd client
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
3. Create your .env.local file:
|
||||
|
||||
```bash
|
||||
cp env.local.example .env.local
|
||||
```
|
||||
|
||||
> Note: No keys need to be modified. `NEXT_PUBLIC_API_BASE_URL` is already configured for local use.
|
||||
|
||||
4. Start the development server:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
5. Open [http://localhost:3000](http://localhost:3000) in your browser.
|
||||
|
||||
## Deploy the app
|
||||
|
||||
### Deploy the server to Pipecat Cloud
|
||||
|
||||
1. Navigate to server
|
||||
|
||||
```bash
|
||||
cd server
|
||||
```
|
||||
|
||||
2. You should already have a .env set up from running locally. If not, do that now.
|
||||
|
||||
3. Update your build and deploy scripts.
|
||||
|
||||
- In build.sh, set `DOCKER_USERNAME` and `AGENT_NAME`.
|
||||
- In pcc-deploy.toml, set `image`, which specifies where your Docker image is stored.
|
||||
|
||||
4. Build your Docker image by running the build script:
|
||||
|
||||
```bash
|
||||
./build.sh
|
||||
```
|
||||
|
||||
> Note: This builds, tags and pushes your docker image and assumes Docker Hub is the container registry.
|
||||
|
||||
5. Make sure you have the Pipecat Cloud CLI installed:
|
||||
|
||||
```bash
|
||||
pip install pipecatcloud
|
||||
```
|
||||
|
||||
6. Login via the Pipecat Cloud CLI:
|
||||
|
||||
```bash
|
||||
pcc auth login
|
||||
```
|
||||
|
||||
> Note: If you don't have an account, sign up at https://pipecat.daily.co.
|
||||
|
||||
7. Add a secrets set:
|
||||
|
||||
```bash
|
||||
pcc secrets set pcc-smart-turn-secrets --file .env
|
||||
```
|
||||
|
||||
8. Deploy your agent:
|
||||
|
||||
```bash
|
||||
pcc deploy
|
||||
```
|
||||
|
||||
> Note: This uses your pcc-deploy.toml settings. Modify as needed.
|
||||
|
||||
### Deploy the client to Vercel
|
||||
|
||||
This project uses TypeScript, React, and Next.js, making it a perfect fit for [Vercel](https://vercel.com/).
|
||||
|
||||
- In your client directory, install Vercel's CLI tool: `npm install -g vercel`
|
||||
- Verify it's installed using `vercel --version`
|
||||
- Log in your Vercel account using `vercel login`
|
||||
- Deploy your client to Vercel using `vercel`
|
||||
|
||||
Follow the vercel prompts to deploy your project.
|
||||
|
||||
### Test your deployed app
|
||||
|
||||
Now with the client and server deployed, you can join the call using your Vercel URL.
|
||||
|
||||
See the debug information for the Smart Turn data. It prints a log line for each smart-turn inference:
|
||||
|
||||
```
|
||||
Smart Turn: COMPLETE, Probability: 95.3%, Model inference: 65.23ms, Server processing: 82.09ms, End-to-end: 245.43ms
|
||||
```
|
||||
41
examples/fal-smart-turn/client/.gitignore
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.*
|
||||
.yarn/*
|
||||
!.yarn/patches
|
||||
!.yarn/plugins
|
||||
!.yarn/releases
|
||||
!.yarn/versions
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# env files (can opt-in for committing if needed)
|
||||
.env*
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
3
examples/fal-smart-turn/client/env.local.example
Normal file
@@ -0,0 +1,3 @@
|
||||
NEXT_PUBLIC_API_BASE_URL=http://localhost:7860
|
||||
PIPECAT_CLOUD_API_KEY=
|
||||
AGENT_NAME=pcc-smart-turn
|
||||
16
examples/fal-smart-turn/client/eslint.config.mjs
Normal file
@@ -0,0 +1,16 @@
|
||||
import { dirname } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import { FlatCompat } from "@eslint/eslintrc";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
const compat = new FlatCompat({
|
||||
baseDirectory: __dirname,
|
||||
});
|
||||
|
||||
const eslintConfig = [
|
||||
...compat.extends("next/core-web-vitals", "next/typescript"),
|
||||
];
|
||||
|
||||
export default eslintConfig;
|
||||
7
examples/fal-smart-turn/client/next.config.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
import type { NextConfig } from "next";
|
||||
|
||||
const nextConfig: NextConfig = {
|
||||
/* config options here */
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
5174
examples/fal-smart-turn/client/package-lock.json
generated
Normal file
28
examples/fal-smart-turn/client/package.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"name": "my-nextjs-app",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/client-react": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.10",
|
||||
"next": "15.3.1",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/eslintrc": "^3",
|
||||
"@types/node": "^20",
|
||||
"@types/react": "^19",
|
||||
"@types/react-dom": "^19",
|
||||
"eslint": "^9",
|
||||
"eslint-config-next": "15.2.3",
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
7
examples/fal-smart-turn/client/public/favicon.svg
Normal file
@@ -0,0 +1,7 @@
|
||||
<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M3.3088 5.05615C3.64682 4.92779 4.02833 5.02411 4.26653 5.29797L7.36884 8.86461H16.6312L19.7335 5.29797C19.9717 5.02411 20.3532 4.92779 20.6912 5.05615C21.0292 5.18452 21.253 5.51072 21.253 5.87504V13.75H24V15.5H19.5181V8.19909L17.6762 10.3167C17.5115 10.506 17.2738 10.6146 17.0241 10.6146H6.9759C6.72616 10.6146 6.48854 10.506 6.32383 10.3167L4.48193 8.19909V15.5H0V13.75H2.74699V5.87504C2.74699 5.51072 2.97078 5.18452 3.3088 5.05615Z" fill="black"/>
|
||||
<path d="M19.5181 17.25H24V19H19.5181V17.25Z" fill="black"/>
|
||||
<path d="M0 17.25H4.48193V19H0V17.25Z" fill="black"/>
|
||||
<path d="M9.25301 14.3333C9.25301 14.9777 8.73517 15.5 8.09639 15.5C7.4576 15.5 6.93976 14.9777 6.93976 14.3333C6.93976 13.689 7.4576 13.1667 8.09639 13.1667C8.73517 13.1667 9.25301 13.689 9.25301 14.3333Z" fill="black"/>
|
||||
<path d="M17.0602 14.3333C17.0602 14.9777 16.5424 15.5 15.9036 15.5C15.2648 15.5 14.747 14.9777 14.747 14.3333C14.747 13.689 15.2648 13.1667 15.9036 13.1667C16.5424 13.1667 17.0602 13.689 17.0602 14.3333Z" fill="black"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.1 KiB |
44
examples/fal-smart-turn/client/src/app/api/connect/route.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
import { NextResponse, NextRequest } from 'next/server';
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
const { MY_CUSTOM_DATA } = await request.json();
|
||||
|
||||
try {
|
||||
const response = await fetch(
|
||||
`https://api.pipecat.daily.co/v1/public/${process.env.AGENT_NAME}/start`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${process.env.PIPECAT_CLOUD_API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
// Create Daily room
|
||||
createDailyRoom: true,
|
||||
// Optionally set Daily room properties
|
||||
dailyRoomProperties: { start_video_off: true },
|
||||
// Optionally pass custom data to the bot
|
||||
body: { MY_CUSTOM_DATA },
|
||||
}),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`API responded with status: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Transform the response to match what RTVI client expects
|
||||
return NextResponse.json({
|
||||
room_url: data.dailyRoom,
|
||||
token: data.dailyToken,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('API error:', error);
|
||||
return NextResponse.json(
|
||||
{ error: 'Failed to start agent' },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
82
examples/fal-smart-turn/client/src/app/globals.css
Normal file
@@ -0,0 +1,82 @@
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
.app {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.status-bar {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 10px;
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.controls button {
|
||||
padding: 8px 16px;
|
||||
margin-left: 10px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.connect-btn {
|
||||
background-color: #4caf50;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.disconnect-btn {
|
||||
background-color: #f44336;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.main-content {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.bot-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.video-container {
|
||||
width: 640px;
|
||||
height: 360px;
|
||||
background-color: #ddd;
|
||||
margin-bottom: 20px;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.video-container video {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
}
|
||||
|
||||
.mic-enabled {
|
||||
background-color: #4caf50;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.mic-disabled {
|
||||
background-color: #f44336;
|
||||
color: white;
|
||||
}
|
||||
27
examples/fal-smart-turn/client/src/app/layout.tsx
Normal file
@@ -0,0 +1,27 @@
|
||||
import './globals.css';
|
||||
import { RTVIProvider } from '@/providers/RTVIProvider';
|
||||
|
||||
export const metadata = {
|
||||
title: 'Pipecat React Client',
|
||||
description: 'Pipecat RTVI Client using Next.js',
|
||||
icons: {
|
||||
icon: [{ url: '/favicon.svg', type: 'image/svg+xml' }],
|
||||
},
|
||||
};
|
||||
|
||||
export default function RootLayout({
|
||||
children,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
}) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<head>
|
||||
<link rel="icon" href="/favicon.svg" type="image/svg+xml" />
|
||||
</head>
|
||||
<body>
|
||||
<RTVIProvider>{children}</RTVIProvider>
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||