Compare commits
812 Commits
v0.0.80
...
hush/usage
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb6e86e69f | ||
|
|
fce6f55ddb | ||
|
|
d9580f72a9 | ||
|
|
0588c82bbf | ||
|
|
16e9093d5a | ||
|
|
91a5d580fd | ||
|
|
0473556992 | ||
|
|
fdaa4e476e | ||
|
|
502e7e42a7 | ||
|
|
2ab3d4fb42 | ||
|
|
55014bdd77 | ||
|
|
334796bd65 | ||
|
|
1c25b6fb72 | ||
|
|
91b29de7ca | ||
|
|
21d610cd30 | ||
|
|
f7fe673ad1 | ||
|
|
4b415721e2 | ||
|
|
8d2a98e0e7 | ||
|
|
523e890c8c | ||
|
|
3c748fe772 | ||
|
|
d293cee372 | ||
|
|
8b62a96878 | ||
|
|
0c102ce70b | ||
|
|
3894d2a4b9 | ||
|
|
1f6b61c0db | ||
|
|
8ee28b37cd | ||
|
|
e85e7e4d84 | ||
|
|
1b3afb5511 | ||
|
|
7cec013666 | ||
|
|
86127167fb | ||
|
|
9935a68018 | ||
|
|
5679dde70f | ||
|
|
d81b0f6368 | ||
|
|
9698b008da | ||
|
|
7b05c9283b | ||
|
|
303dd2ec35 | ||
|
|
aa6e81648a | ||
|
|
1a87870ef3 | ||
|
|
aac4ce2d12 | ||
|
|
2a79b2c853 | ||
|
|
15bf5b1533 | ||
|
|
cdc86db8ce | ||
|
|
9d2ad750b5 | ||
|
|
19ceb1a48f | ||
|
|
59217eae38 | ||
|
|
bea0aee835 | ||
|
|
aeace9b9be | ||
|
|
2994640f47 | ||
|
|
10069719e4 | ||
|
|
046b76df60 | ||
|
|
f2d9063984 | ||
|
|
99f008e927 | ||
|
|
2699f0c2a6 | ||
|
|
0b6dd98000 | ||
|
|
a14fb20d15 | ||
|
|
728361a6a7 | ||
|
|
106db69e8e | ||
|
|
cf90071926 | ||
|
|
deaeb75a1f | ||
|
|
a666327d70 | ||
|
|
13a0522546 | ||
|
|
7da37a0d1f | ||
|
|
7efb22a323 | ||
|
|
8084e2f909 | ||
|
|
86127c6a6e | ||
|
|
402e019ae2 | ||
|
|
f09e4e238b | ||
|
|
2921162b3b | ||
|
|
ac1582c906 | ||
|
|
e4b01a5844 | ||
|
|
fa663abbbc | ||
|
|
d19e6111c3 | ||
|
|
8a6d504a7e | ||
|
|
43915937f2 | ||
|
|
48e92a22fe | ||
|
|
566af6b0b8 | ||
|
|
12e7613d5f | ||
|
|
04a68f2c57 | ||
|
|
9b4ca12f49 | ||
|
|
453ce715a6 | ||
|
|
d87b6189ba | ||
|
|
8293347b77 | ||
|
|
c85a3f0b94 | ||
|
|
233fb25e6c | ||
|
|
080978daa6 | ||
|
|
62b7c3d3b2 | ||
|
|
4b2379cba8 | ||
|
|
92087bdfa8 | ||
|
|
617919ac09 | ||
|
|
0669daec3d | ||
|
|
7c15a8c800 | ||
|
|
066b77fba0 | ||
|
|
d9aef5f916 | ||
|
|
91ae3f8a9b | ||
|
|
36da623352 | ||
|
|
31b9087ea6 | ||
|
|
1851fed22e | ||
|
|
eddce460da | ||
|
|
da4f30cb6d | ||
|
|
250cf2d8f1 | ||
|
|
7bbdb4f991 | ||
|
|
051c4782fb | ||
|
|
b1ccec74b2 | ||
|
|
92bf0d9eda | ||
|
|
f985550441 | ||
|
|
de8ee96927 | ||
|
|
2576d0f340 | ||
|
|
f38f4711ac | ||
|
|
c2f3ddd329 | ||
|
|
73ffe96228 | ||
|
|
bd13a80da7 | ||
|
|
312959f97e | ||
|
|
fe168e3c68 | ||
|
|
28929a47f7 | ||
|
|
03f5defbc3 | ||
|
|
b216648315 | ||
|
|
084b133a01 | ||
|
|
e589876176 | ||
|
|
a826313bf9 | ||
|
|
49f44aa7c8 | ||
|
|
64ceef9cf0 | ||
|
|
cd6567c1f1 | ||
|
|
ac67ca1555 | ||
|
|
8d38994756 | ||
|
|
607e3040d4 | ||
|
|
60604a9449 | ||
|
|
4abe4a6253 | ||
|
|
4c054af17b | ||
|
|
dcba940d42 | ||
|
|
ad2adb0c58 | ||
|
|
76923010b5 | ||
|
|
1b511557b2 | ||
|
|
fdadb12933 | ||
|
|
f1bbb7ba22 | ||
|
|
c1492c5275 | ||
|
|
4ffdabcfde | ||
|
|
b489de2fc3 | ||
|
|
d9656cbb1a | ||
|
|
05fb223985 | ||
|
|
62a5f07ad2 | ||
|
|
b669e3a481 | ||
|
|
99f1041a47 | ||
|
|
37b1345bfa | ||
|
|
8994ac17eb | ||
|
|
63bc825008 | ||
|
|
e7ffde1c4c | ||
|
|
1c88565725 | ||
|
|
07a6c2fb0e | ||
|
|
e99f3bf75a | ||
|
|
f09d780413 | ||
|
|
e370d23374 | ||
|
|
b68ec14146 | ||
|
|
c567fd71b1 | ||
|
|
2ca1b2d6f8 | ||
|
|
04041a9a9a | ||
|
|
6c498dc70f | ||
|
|
32b07c1720 | ||
|
|
ad507ce23d | ||
|
|
be562cedfc | ||
|
|
089e703e1f | ||
|
|
4dc1e15a99 | ||
|
|
c7dc2e886f | ||
|
|
11bc4ea854 | ||
|
|
029d76033d | ||
|
|
924d7dea9a | ||
|
|
244e94f3ce | ||
|
|
af1f51d49e | ||
|
|
9ba3c168b8 | ||
|
|
e6ee8f7a16 | ||
|
|
2ea2bd99e0 | ||
|
|
0c2ced7c52 | ||
|
|
fb160646b8 | ||
|
|
89fed57af2 | ||
|
|
feae3b6d2d | ||
|
|
92d3be8975 | ||
|
|
0f53e1db2c | ||
|
|
d398e8cc10 | ||
|
|
e5f263d380 | ||
|
|
3a4c303c54 | ||
|
|
54a1ef47d0 | ||
|
|
149ffa4f3c | ||
|
|
e5465034d9 | ||
|
|
568c7c782d | ||
|
|
9851334221 | ||
|
|
e79c4fc99d | ||
|
|
55c321f4ff | ||
|
|
a14a53a005 | ||
|
|
a71f937e8f | ||
|
|
032032df65 | ||
|
|
d0178edad0 | ||
|
|
795c5e55d9 | ||
|
|
8f8d8ae0d8 | ||
|
|
741f192d04 | ||
|
|
a5595b82ea | ||
|
|
4d1915eb41 | ||
|
|
b3a84fc772 | ||
|
|
403d22e62c | ||
|
|
ee00ee5c57 | ||
|
|
f53fd880dc | ||
|
|
de3461e4cc | ||
|
|
7bafc3a1bb | ||
|
|
22ef61fe8d | ||
|
|
7078fb53bd | ||
|
|
33447ad6f2 | ||
|
|
6faa50ae5b | ||
|
|
3797f41c8c | ||
|
|
ff919b8c15 | ||
|
|
cb048d6c7e | ||
|
|
6c2c43ade0 | ||
|
|
f899c15b03 | ||
|
|
d10ef08775 | ||
|
|
27a5af6fa1 | ||
|
|
4bff0a7c49 | ||
|
|
508f7d203d | ||
|
|
0f87d5342c | ||
|
|
f6164e3bde | ||
|
|
1a0fb55d0f | ||
|
|
6d0beef944 | ||
|
|
b9fd6b873b | ||
|
|
dea0f1791f | ||
|
|
da66c38795 | ||
|
|
912f8b96f0 | ||
|
|
f9eb447d82 | ||
|
|
65f5fe8588 | ||
|
|
817c77f3fe | ||
|
|
8896179b00 | ||
|
|
463752360b | ||
|
|
66b7977a62 | ||
|
|
468de68aec | ||
|
|
c4762c1a92 | ||
|
|
7f4d3a2f02 | ||
|
|
88614b312f | ||
|
|
5b4655f45a | ||
|
|
d7c8f8df53 | ||
|
|
2571cb2e69 | ||
|
|
15782be27c | ||
|
|
997e4b66c6 | ||
|
|
6ccbfd9b57 | ||
|
|
677f69971c | ||
|
|
678dd22b8e | ||
|
|
0bba02028d | ||
|
|
620b1f785c | ||
|
|
780e91eb91 | ||
|
|
667569ef47 | ||
|
|
17ea0afa6f | ||
|
|
3fc5214c15 | ||
|
|
1636c48ab9 | ||
|
|
c3a2fa100c | ||
|
|
8649368337 | ||
|
|
781366627c | ||
|
|
f6b4db42ef | ||
|
|
ed64716219 | ||
|
|
5c22b2e1de | ||
|
|
d4b1e1ab41 | ||
|
|
fafe0cc4a3 | ||
|
|
40c82a8530 | ||
|
|
98d3686861 | ||
|
|
88337fc21f | ||
|
|
928c0ef1b4 | ||
|
|
1f005e7075 | ||
|
|
2cee6229ae | ||
|
|
10e9371f49 | ||
|
|
e21ab89509 | ||
|
|
cbce2075eb | ||
|
|
97868175e6 | ||
|
|
99731ca40a | ||
|
|
f96cbcce22 | ||
|
|
a19b9f70c0 | ||
|
|
9b4f1bdf39 | ||
|
|
6b2bf8de64 | ||
|
|
33481c6614 | ||
|
|
a5776b20ad | ||
|
|
e286e015cf | ||
|
|
a7bfac8d68 | ||
|
|
1647b5b665 | ||
|
|
eaecefe675 | ||
|
|
7c569b3863 | ||
|
|
8bf6a4c66f | ||
|
|
1df3660186 | ||
|
|
75c0b089e0 | ||
|
|
d8f3d4dd32 | ||
|
|
c5e53bb84f | ||
|
|
b04e494373 | ||
|
|
392293d55f | ||
|
|
272532a3ea | ||
|
|
3d04f565ec | ||
|
|
d0477edb6a | ||
|
|
326bfe4239 | ||
|
|
3cb78d839d | ||
|
|
9129e44c05 | ||
|
|
ec664e2d33 | ||
|
|
3d88b42e0b | ||
|
|
2289409b4c | ||
|
|
b1551b0d6b | ||
|
|
7cb5c951f4 | ||
|
|
a2cb5ab8e1 | ||
|
|
cad3104d56 | ||
|
|
d8a2a917a2 | ||
|
|
3984cb58a2 | ||
|
|
9027a96a07 | ||
|
|
6abac3e3e5 | ||
|
|
077b949bb2 | ||
|
|
3a6c9786e8 | ||
|
|
492da16cc9 | ||
|
|
a698c4064b | ||
|
|
9e098b5f79 | ||
|
|
7df7395dd1 | ||
|
|
0885bc9cdf | ||
|
|
889dc19a27 | ||
|
|
4bc41466b7 | ||
|
|
9ab8ddee79 | ||
|
|
0204f6a95d | ||
|
|
b0bf653f04 | ||
|
|
e8a676eb36 | ||
|
|
ca96eef1f3 | ||
|
|
8e1637d6c7 | ||
|
|
367200c0ad | ||
|
|
766e1948a6 | ||
|
|
f369683b8b | ||
|
|
461025d1cc | ||
|
|
ac88706f38 | ||
|
|
93a89449b8 | ||
|
|
199bf72945 | ||
|
|
d20e4125f6 | ||
|
|
c1baed642e | ||
|
|
33ef68573f | ||
|
|
3c1b41df13 | ||
|
|
58f70e7e0d | ||
|
|
fca4ecc73c | ||
|
|
d0b573e44f | ||
|
|
cfa333508b | ||
|
|
9e7260393a | ||
|
|
073b585c52 | ||
|
|
81c2e51bec | ||
|
|
42344125b1 | ||
|
|
db5bcfaa51 | ||
|
|
615239b7d2 | ||
|
|
27f1e9dd69 | ||
|
|
bd760deff2 | ||
|
|
8bc3c89140 | ||
|
|
2cd2567a37 | ||
|
|
5b55988846 | ||
|
|
a12392182c | ||
|
|
b814b70e1e | ||
|
|
a1f84e1b50 | ||
|
|
0839b48da8 | ||
|
|
de51637b77 | ||
|
|
e1b1dc16ec | ||
|
|
1fe27eb0a2 | ||
|
|
d7e1389497 | ||
|
|
8c7230aa8f | ||
|
|
2cf71239b0 | ||
|
|
ec2c62e32b | ||
|
|
38ce85e9a0 | ||
|
|
2279e5a899 | ||
|
|
cce6eb5d87 | ||
|
|
c2b98ae557 | ||
|
|
727eb12b16 | ||
|
|
ba96bd05d3 | ||
|
|
8ead309f8d | ||
|
|
fad0e55c64 | ||
|
|
74b1af56a0 | ||
|
|
6924850ec4 | ||
|
|
dfe7815dc5 | ||
|
|
69f0a75882 | ||
|
|
cca90791c4 | ||
|
|
f2a5d408de | ||
|
|
044c6eba46 | ||
|
|
db71089f5e | ||
|
|
f861f5066f | ||
|
|
81cede2c60 | ||
|
|
7603203230 | ||
|
|
8569b61598 | ||
|
|
fe42187dc1 | ||
|
|
999e88c942 | ||
|
|
c04df2f28b | ||
|
|
100ef0ab5c | ||
|
|
42886d7105 | ||
|
|
22cbba002a | ||
|
|
0a043154f2 | ||
|
|
5e322eba9e | ||
|
|
11d0c3d46d | ||
|
|
c873798ce5 | ||
|
|
95f72f6dce | ||
|
|
d8cd28bb8b | ||
|
|
c2df6c8aee | ||
|
|
82478be861 | ||
|
|
0f2b7bc01b | ||
|
|
1b2a5df017 | ||
|
|
2f496ac74f | ||
|
|
22633a63b0 | ||
|
|
e5ed0424e4 | ||
|
|
786387722a | ||
|
|
9f82c6b4a4 | ||
|
|
99cfcb1d4e | ||
|
|
d595676436 | ||
|
|
0190812ee8 | ||
|
|
2a24061bbb | ||
|
|
89f7e7d199 | ||
|
|
384814e640 | ||
|
|
ab4364b833 | ||
|
|
fafdadad3c | ||
|
|
05dc2fa916 | ||
|
|
0c30cc6ea6 | ||
|
|
c26d336e34 | ||
|
|
37b6198787 | ||
|
|
3c271da94c | ||
|
|
be28d3f93b | ||
|
|
d2f210e960 | ||
|
|
57add41971 | ||
|
|
74b38b59d6 | ||
|
|
dac58deffc | ||
|
|
aff11f5121 | ||
|
|
a4023d3915 | ||
|
|
d6543d244d | ||
|
|
fafcd79870 | ||
|
|
6a717fbbd1 | ||
|
|
9b3f6927c2 | ||
|
|
0b21f8a6bd | ||
|
|
8249b014f0 | ||
|
|
9d9f10ae0e | ||
|
|
e27b23694d | ||
|
|
66ce5fe6bd | ||
|
|
a9b53dc800 | ||
|
|
818352a300 | ||
|
|
3e9fc7be19 | ||
|
|
a2e76bcad8 | ||
|
|
8e8e42717b | ||
|
|
b31322e38e | ||
|
|
305108be9a | ||
|
|
908325484d | ||
|
|
dd6ff789c7 | ||
|
|
f4938e0fad | ||
|
|
2e1f397d17 | ||
|
|
e8f60c7c6f | ||
|
|
fedb8a201f | ||
|
|
8ccd220a60 | ||
|
|
fe79de8f27 | ||
|
|
176573c342 | ||
|
|
75f9914f49 | ||
|
|
f4d6715e32 | ||
|
|
38f6e33f97 | ||
|
|
1c3e4e34e5 | ||
|
|
623c660027 | ||
|
|
a3e65ab3b5 | ||
|
|
f3a4b416df | ||
|
|
aa471a4ef5 | ||
|
|
d55133a44f | ||
|
|
0f1cf81691 | ||
|
|
ac4d335799 | ||
|
|
e65385c151 | ||
|
|
0bb7df7a6b | ||
|
|
daee1ddf3b | ||
|
|
d1f72c1c0b | ||
|
|
1cccb97ccf | ||
|
|
d7794abf21 | ||
|
|
6a6a63a532 | ||
|
|
6edb6fed41 | ||
|
|
a537382816 | ||
|
|
46deaada70 | ||
|
|
7366b1aee0 | ||
|
|
dbc52bc6b0 | ||
|
|
d6432589f6 | ||
|
|
13b73d4406 | ||
|
|
85d8282f7e | ||
|
|
070690ec64 | ||
|
|
b9c96fd623 | ||
|
|
f8b2ab6331 | ||
|
|
ea3f7e3c34 | ||
|
|
2f44f88b08 | ||
|
|
25747a001b | ||
|
|
fbe4338440 | ||
|
|
64b4c65728 | ||
|
|
29442969a9 | ||
|
|
dc2e1d4ad3 | ||
|
|
5477dfcbea | ||
|
|
516f0e08ab | ||
|
|
246f9f3325 | ||
|
|
4699ee8d86 | ||
|
|
3d850e8cc5 | ||
|
|
6e734a37f9 | ||
|
|
f72ca2fd7d | ||
|
|
0826d72f74 | ||
|
|
ba5ebfa0ec | ||
|
|
dc3412b2df | ||
|
|
b2e9fd9341 | ||
|
|
c11b207c97 | ||
|
|
d6205027cf | ||
|
|
986160c077 | ||
|
|
b56ff86fee | ||
|
|
5c574eaad9 | ||
|
|
2df231143a | ||
|
|
e3597801d4 | ||
|
|
65298ab792 | ||
|
|
b609b02614 | ||
|
|
f2b50c14d2 | ||
|
|
ee3b023986 | ||
|
|
0d9e1190d7 | ||
|
|
595a7c7fbe | ||
|
|
586586f743 | ||
|
|
a1c6ad539d | ||
|
|
daf7fed8b3 | ||
|
|
446d99d194 | ||
|
|
cbdbdee4c0 | ||
|
|
a26647c433 | ||
|
|
0fab56fc13 | ||
|
|
f0baff94b2 | ||
|
|
d146170fd6 | ||
|
|
001a2d36e5 | ||
|
|
99e237b1e2 | ||
|
|
978f644f19 | ||
|
|
5a4c6b9618 | ||
|
|
977a57c8fb | ||
|
|
c64bc5a636 | ||
|
|
eba006d39c | ||
|
|
a001f6f193 | ||
|
|
09d6ec1098 | ||
|
|
f56be9315a | ||
|
|
8e5880b2e7 | ||
|
|
d8ac6f2c1a | ||
|
|
052ffe8712 | ||
|
|
b52296450c | ||
|
|
c71cec04d3 | ||
|
|
83f64ecd3b | ||
|
|
d19170d8b1 | ||
|
|
8b95d74193 | ||
|
|
3c4694a8f1 | ||
|
|
b9748b1228 | ||
|
|
def1cf1548 | ||
|
|
9b216116f1 | ||
|
|
7cb372ebb9 | ||
|
|
6838bc1e51 | ||
|
|
e04f42167e | ||
|
|
91a3f63e28 | ||
|
|
b24eb76559 | ||
|
|
d9ea02595b | ||
|
|
5bc0e49baa | ||
|
|
ec138b97d9 | ||
|
|
0c32cc29a7 | ||
|
|
d740bab99e | ||
|
|
ac62183eb6 | ||
|
|
34f823bcac | ||
|
|
b4e1051066 | ||
|
|
d8882bc381 | ||
|
|
da18d0a562 | ||
|
|
f8e13a82cf | ||
|
|
2b00d37e94 | ||
|
|
2dbd17da4d | ||
|
|
d45fbd5455 | ||
|
|
b22bdff6d0 | ||
|
|
2b286365e0 | ||
|
|
0a3e98857e | ||
|
|
aeb9f1ffca | ||
|
|
7f1100bd4c | ||
|
|
8fbd9b5af7 | ||
|
|
49c1f0bd08 | ||
|
|
ce7a0512f9 | ||
|
|
fdcd14dd21 | ||
|
|
0386599163 | ||
|
|
c1ce3d7d2b | ||
|
|
8ecece2d9c | ||
|
|
0d8ab7abca | ||
|
|
dea7c22020 | ||
|
|
cfe11267f4 | ||
|
|
d0c97d3602 | ||
|
|
37e1551abc | ||
|
|
e1477e79f0 | ||
|
|
547b126d98 | ||
|
|
447e3b28eb | ||
|
|
472efa2971 | ||
|
|
64486ef50b | ||
|
|
5f801743d0 | ||
|
|
802c5d04f4 | ||
|
|
83b90da53a | ||
|
|
1f49de5cdf | ||
|
|
2ee481d541 | ||
|
|
7cf099eae7 | ||
|
|
93a8ea3cb2 | ||
|
|
776aafddfb | ||
|
|
d56762262a | ||
|
|
bbcf35d657 | ||
|
|
972546b24f | ||
|
|
8b351f5bec | ||
|
|
bd7d9346b7 | ||
|
|
81325be4f3 | ||
|
|
399f8de6ef | ||
|
|
60c070e077 | ||
|
|
e3f2faabf7 | ||
|
|
b5a644dd6f | ||
|
|
e06bd6049e | ||
|
|
25b595e125 | ||
|
|
edc8cc1e69 | ||
|
|
633dd69dee | ||
|
|
1a1d5a1081 | ||
|
|
c1b8d2acab | ||
|
|
ea368e4c5f | ||
|
|
f03deb6ecc | ||
|
|
0e01ac8ef6 | ||
|
|
5787743ab3 | ||
|
|
79be0695dd | ||
|
|
a5c5e069ba | ||
|
|
77c34076f7 | ||
|
|
d67cece356 | ||
|
|
275c8b59c5 | ||
|
|
5ebcea2a3b | ||
|
|
64f2135ddc | ||
|
|
a74231f036 | ||
|
|
189749b579 | ||
|
|
e384ca949e | ||
|
|
eb248fedc1 | ||
|
|
16f57be72c | ||
|
|
5803936838 | ||
|
|
d9837dd1e5 | ||
|
|
e48c9fc3e2 | ||
|
|
3c4454a33e | ||
|
|
2a0780e6ef | ||
|
|
5e121346fb | ||
|
|
2bdca8d22c | ||
|
|
1f5888bcf7 | ||
|
|
3d09f9a2af | ||
|
|
cd3563bb16 | ||
|
|
3e79ef4118 | ||
|
|
2613da1a1f | ||
|
|
41d40f9a11 | ||
|
|
74af2b6aa4 | ||
|
|
f7d9f32b0f | ||
|
|
6074af60ef | ||
|
|
7ef6893c0d | ||
|
|
cc5557e051 | ||
|
|
06f7a92c99 | ||
|
|
61a333ccae | ||
|
|
fc3d84dff7 | ||
|
|
86a37d8cea | ||
|
|
3f66acf9f1 | ||
|
|
facfaa2dd4 | ||
|
|
8250c381d1 | ||
|
|
32f9e48865 | ||
|
|
76eef837b6 | ||
|
|
c9aaa463b7 | ||
|
|
6d582e41b7 | ||
|
|
ca29f62bff | ||
|
|
0dced68c3c | ||
|
|
8ab81d289a | ||
|
|
f457d00760 | ||
|
|
f5118c4412 | ||
|
|
a79fe40162 | ||
|
|
dcb4949e20 | ||
|
|
8b543e558d | ||
|
|
8181962236 | ||
|
|
98dc891640 | ||
|
|
71de0da570 | ||
|
|
b40c8bb81d | ||
|
|
43f1b59b86 | ||
|
|
a0a2bb3aa4 | ||
|
|
04a50df3d5 | ||
|
|
8c0edffaff | ||
|
|
fe6063fdbe | ||
|
|
195146adb2 | ||
|
|
cab9e18cc9 | ||
|
|
baef688e4e | ||
|
|
f1f43fe500 | ||
|
|
73b63f8d35 | ||
|
|
0c14b33e92 | ||
|
|
09beaccaf0 | ||
|
|
40557a1aae | ||
|
|
ecc4cc4a79 | ||
|
|
37be8805f4 | ||
|
|
93c7e64995 | ||
|
|
9de2bd61a9 | ||
|
|
566af71862 | ||
|
|
12064bd6e6 | ||
|
|
a962459151 | ||
|
|
8fc76a29bc | ||
|
|
e3019261a5 | ||
|
|
fa1f6f1c51 | ||
|
|
337f00c16c | ||
|
|
d50922cdcd | ||
|
|
47f5ca6265 | ||
|
|
2eddb6ffda | ||
|
|
560a6f2247 | ||
|
|
59ecb19000 | ||
|
|
cfb094b3c8 | ||
|
|
1f7e8e001b | ||
|
|
688b136141 | ||
|
|
809c4c1bc5 | ||
|
|
81ca5e6601 | ||
|
|
ebc49d2252 | ||
|
|
ff8d158e18 | ||
|
|
37980b0854 | ||
|
|
39ebc2c9c1 | ||
|
|
ab61d09ec1 | ||
|
|
e4afc0a13c | ||
|
|
dde3d2395b | ||
|
|
30b36c3d6e | ||
|
|
de4dfc3ed4 | ||
|
|
a0128516ff | ||
|
|
db3b8c7325 | ||
|
|
9273ec0f25 | ||
|
|
8dfa1187be | ||
|
|
e17fd580c6 | ||
|
|
3e3d50a855 | ||
|
|
402661ae03 | ||
|
|
69c6a95b8a | ||
|
|
4d49210a73 | ||
|
|
5f8a22ef2f | ||
|
|
606ad0826a | ||
|
|
57028255ee | ||
|
|
87ebbab758 | ||
|
|
bd401e8d6f | ||
|
|
f0dfab23e7 | ||
|
|
fbc907c371 | ||
|
|
40b5ef485d | ||
|
|
b30af3e155 | ||
|
|
446bb5cddf | ||
|
|
1c1ee94074 | ||
|
|
ac30083b45 | ||
|
|
ce579d4266 | ||
|
|
5a07b30c7a | ||
|
|
9da33f3897 | ||
|
|
5ca82ec61e | ||
|
|
0067c7df47 | ||
|
|
ab03db5b0c | ||
|
|
238d6bf9ab | ||
|
|
90ae85bab2 | ||
|
|
29e09b2053 | ||
|
|
bad9977e8c | ||
|
|
b987579d54 | ||
|
|
40f1f4ff11 | ||
|
|
a3ad31d0f6 | ||
|
|
8044c4170d | ||
|
|
bc51e7abc6 | ||
|
|
256ecf4d71 | ||
|
|
c16969c4f5 | ||
|
|
8ef64d8c8d | ||
|
|
4947d08733 | ||
|
|
b61846534d | ||
|
|
8f01cd220a | ||
|
|
3abaaf80e0 | ||
|
|
13890fa021 | ||
|
|
802af28888 | ||
|
|
24a628c85e | ||
|
|
ddab95835b | ||
|
|
cb13f4b4cb | ||
|
|
4793277d34 | ||
|
|
28c729cc36 | ||
|
|
4d07c7b77c | ||
|
|
4ff0567025 | ||
|
|
1377dec01b | ||
|
|
42f4d73a63 | ||
|
|
f1c1ebf852 | ||
|
|
eb6d43f6cb | ||
|
|
f387776985 | ||
|
|
5286591826 | ||
|
|
6831e63ec9 | ||
|
|
12bcb7db64 | ||
|
|
1b48b1d860 | ||
|
|
d161e2767f | ||
|
|
4e3af00b6d | ||
|
|
4015aedb86 | ||
|
|
75a6ee839b | ||
|
|
13ce02c896 | ||
|
|
2fd5885dc3 | ||
|
|
d743586bfb | ||
|
|
8051017895 | ||
|
|
dc7bf98ce5 | ||
|
|
609a43a191 | ||
|
|
4fb04422d9 | ||
|
|
2f74a7e674 | ||
|
|
5205f56087 | ||
|
|
694c792af3 | ||
|
|
48b3ad8f8f | ||
|
|
406e82a842 | ||
|
|
837de5f893 | ||
|
|
10b9b1da2f | ||
|
|
7854a2ec83 | ||
|
|
ac7c69078f | ||
|
|
c9b4356ea6 | ||
|
|
b3e4421191 | ||
|
|
84058c3948 | ||
|
|
aebc781419 | ||
|
|
4160446f4c | ||
|
|
05a14af184 | ||
|
|
89d2ef2bde | ||
|
|
f550015efb | ||
|
|
8bbdc7c8d1 | ||
|
|
8fa44863fb | ||
|
|
088cb56922 | ||
|
|
a789e5feea | ||
|
|
16ca44131c | ||
|
|
418860cf26 | ||
|
|
e2fc8b3dce | ||
|
|
8b641089f8 | ||
|
|
d36ed755ce | ||
|
|
7aaf64fe55 | ||
|
|
5f52008974 | ||
|
|
d520677b23 | ||
|
|
42bd1e9d40 | ||
|
|
7f0494aa04 | ||
|
|
b7ae2989ac | ||
|
|
2b2b0f8121 | ||
|
|
5ca33a2b00 | ||
|
|
938dcb613d | ||
|
|
bc748cf9d0 | ||
|
|
3b55d16a49 | ||
|
|
d7f31e0cbd | ||
|
|
c662a2d820 | ||
|
|
2c220ca54e | ||
|
|
b5465364fa | ||
|
|
8714c9137f | ||
|
|
4c029fcfa7 | ||
|
|
5c86f8e687 | ||
|
|
ffa16dd136 |
2
.github/workflows/coverage.yaml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
version: "latest"
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install 3.10
|
||||
run: uv python install 3.12
|
||||
|
||||
- name: Install system packages
|
||||
run: |
|
||||
|
||||
22
.github/workflows/publish.yaml
vendored
@@ -5,25 +5,25 @@ on:
|
||||
inputs:
|
||||
gitref:
|
||||
type: string
|
||||
description: "what git tag to build (e.g. v0.0.74)"
|
||||
description: 'what git tag to build (e.g. v0.0.74)'
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: "Build and upload wheels"
|
||||
name: 'Build and upload wheels'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.gitref }}
|
||||
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v3
|
||||
with:
|
||||
version: "latest"
|
||||
version: 'latest'
|
||||
- name: Set up Python
|
||||
run: uv python install 3.10
|
||||
run: uv python install 3.12
|
||||
- name: Install development dependencies
|
||||
run: uv sync --group dev
|
||||
- name: Build project
|
||||
@@ -35,9 +35,9 @@ jobs:
|
||||
path: ./dist
|
||||
|
||||
publish-to-pypi:
|
||||
name: "Publish to PyPI"
|
||||
name: 'Publish to PyPI'
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ build ]
|
||||
needs: [build]
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/pipecat-ai
|
||||
@@ -56,12 +56,12 @@ jobs:
|
||||
print-hash: true
|
||||
|
||||
publish-to-test-pypi:
|
||||
name: "Publish to Test PyPI"
|
||||
name: 'Publish to Test PyPI'
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ build ]
|
||||
needs: [build]
|
||||
environment:
|
||||
name: testpypi
|
||||
url: https://pypi.org/p/pipecat-ai
|
||||
url: https://test.pypi.org/p/pipecat-ai
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
@@ -70,7 +70,7 @@ jobs:
|
||||
with:
|
||||
name: wheels
|
||||
path: ./dist
|
||||
- name: Publish to PyPI
|
||||
- name: Publish to Test PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
|
||||
12
.github/workflows/publish_test.yaml
vendored
@@ -4,7 +4,7 @@ on: workflow_dispatch
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: "Build and upload wheels"
|
||||
name: 'Build and upload wheels'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -15,9 +15,9 @@ jobs:
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v3
|
||||
with:
|
||||
version: "latest"
|
||||
version: 'latest'
|
||||
- name: Set up Python
|
||||
run: uv python install 3.10
|
||||
run: uv python install 3.12
|
||||
- name: Install development dependencies
|
||||
run: uv sync --group dev
|
||||
- name: Build project
|
||||
@@ -29,12 +29,12 @@ jobs:
|
||||
path: ./dist
|
||||
|
||||
publish-to-test-pypi:
|
||||
name: "Publish to Test PyPI"
|
||||
name: 'Publish to Test PyPI'
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build]
|
||||
environment:
|
||||
name: testpypi
|
||||
url: https://pypi.org/p/pipecat-ai
|
||||
url: https://test.pypi.org/p/pipecat-ai
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
@@ -43,7 +43,7 @@ jobs:
|
||||
with:
|
||||
name: wheels
|
||||
path: ./dist
|
||||
- name: Publish to PyPI
|
||||
- name: Publish to Test PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
|
||||
11
.github/workflows/sync-quickstart.yaml
vendored
@@ -23,17 +23,12 @@ jobs:
|
||||
token: ${{ secrets.QUICKSTART_SYNC_TOKEN }}
|
||||
path: quickstart-repo
|
||||
|
||||
- name: Sync files (excluding READMEs)
|
||||
- name: Sync files (excluding uv.lock and README.md)
|
||||
run: |
|
||||
# Copy code files only, skip READMEs
|
||||
cp examples/quickstart/bot.py quickstart-repo/
|
||||
cp examples/quickstart/requirements.txt quickstart-repo/
|
||||
cp examples/quickstart/env.example quickstart-repo/
|
||||
|
||||
# Copy any other files that aren't README.md
|
||||
# Copy all files except uv.lock and README.md
|
||||
find examples/quickstart -type f \
|
||||
-not -name "README.md" \
|
||||
-not -name "*.md" \
|
||||
-not -name "uv.lock" \
|
||||
-exec cp {} quickstart-repo/ \;
|
||||
|
||||
- name: Commit and push changes
|
||||
|
||||
2
.github/workflows/tests.yaml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
version: "latest"
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install 3.10
|
||||
run: uv python install 3.12
|
||||
|
||||
- name: Install system packages
|
||||
run: |
|
||||
|
||||
888
CHANGELOG.md
@@ -5,6 +5,884 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- The runner `--folder` argument now supports downloading files from
|
||||
subdirectories.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where `RimeHttpTTSService` and `PiperTTSService` could generate
|
||||
incorrectly 16-bit aligned audio frames, potentially leading to internal
|
||||
errors or static audio.
|
||||
|
||||
## [0.0.90] - 2025-10-10
|
||||
|
||||
### Added
|
||||
|
||||
- Added audio filter `KrispVivaFilter` using the Krisp VIVA SDK.
|
||||
|
||||
- Added `--folder` argument to the runner, allowing files saved in that folder
|
||||
to be downloaded from `http://HOST:PORT/file/FILE`.
|
||||
|
||||
- Added `GeminiLiveVertexLLMService`, for accessing Gemini Live via Google
|
||||
Vertex AI.
|
||||
|
||||
- Added some new configuration options to `GeminiLiveLLMService`:
|
||||
|
||||
- `thinking`
|
||||
- `enable_affective_dialog`
|
||||
- `proactivity`
|
||||
|
||||
Note that these new configuration options require using a newer model than
|
||||
the default, like "gemini-2.5-flash-native-audio-preview-09-2025". The last
|
||||
two require specifying `http_options=HttpOptions(api_version="v1alpha")`.
|
||||
|
||||
- Added `on_pipeline_error` event to `PipelineTask`. This event will get fired
|
||||
when an `ErrorFrame` is pushed (use `FrameProcessor.push_error()`).
|
||||
|
||||
```python
|
||||
@task.event_handler("on_pipeline_error")
|
||||
async def on_pipeline_error(task: PipelineTask, frame: ErrorFrame):
|
||||
...
|
||||
```
|
||||
|
||||
- Added a `service_tier` `InputParam` to the `BaseOpenAILLMService`. This
|
||||
parameter can influence the latency of the response. For example `"priority"`
|
||||
will result in faster completions, but in exchange for a higher price.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `GeminiLiveLLMService` to use the `google-genai` library rather than
|
||||
use WebSockets directly.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `LivekitFrameSerializer` is now deprecated. Use `LiveKitTransport` instead.
|
||||
|
||||
- `pipecat.service.openai_realtime` is now deprecated, use
|
||||
`pipecat.services.openai.realtime` instead or
|
||||
`pipecat.services.azure.realtime` for Azure Realtime.
|
||||
|
||||
- `pipecat.service.aws_nova_sonic` is now deprecated, use
|
||||
`pipecat.services.aws.nova_sonic` instead.
|
||||
|
||||
- `GeminiMultimodalLiveLLMService` is now deprecated, use
|
||||
`GeminiLiveLLMService`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `GoogleVertexLLMService` issue that would generate an error if no
|
||||
token information was returned.
|
||||
|
||||
- `GeminiLiveLLMService` will now end gracefully (i.e. after the bot has
|
||||
finished) upon receiving an `EndFrame`.
|
||||
|
||||
- `GeminiLiveLLMService` will try to seamlessly reconnect when it loses its
|
||||
connection.
|
||||
|
||||
## [0.0.89] - 2025-10-07
|
||||
|
||||
### Fixed
|
||||
|
||||
- Reverted a change introduced in 0.0.88 that was causing pipelines to be frozen
|
||||
when using interruption strategies and processors that block interruption
|
||||
frames (e.g. `STTMuteFilter`).
|
||||
|
||||
## [0.0.88] - 2025-10-07
|
||||
|
||||
### Added
|
||||
|
||||
- Added support for Nano Banana models to `GoogleLLMService`. For example, you
|
||||
can now use the `gemini-2.5-flash-image` model to generate images.
|
||||
|
||||
- Added `HumeTTSService` for text-to-speech synthesis using Hume AI's expressive
|
||||
voice models. Provides high-quality, emotionally expressive speech synthesis
|
||||
with support for various voice models. Includes example in
|
||||
`examples/foundational/07ad-interruptible-hume.py`. Use with:
|
||||
`uv pip install pipecat-ai[hume]`.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated default `GoogleLLMService` model to `gemini-2.5-flash`.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- PlayHT is shutting down their API on December 31st, 2025. As a result,
|
||||
`PlayHTTTSService` and `PlayHTHttpTTSService` are deprecated and will be
|
||||
removed in a future version.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with `AWSNovaSonicLLMService` where the client wouldn't
|
||||
connect due to a breaking change in the AWS dependency chain.
|
||||
|
||||
- `PermissionError` is now caught if NLTK's `punkt_tab` can't be downloaded.
|
||||
|
||||
- Fixed an issue that would cause wrong user/assistant context ordering when
|
||||
using interruption strategies.
|
||||
|
||||
- Fixed RTVI incoming message handling, broken in 0.0.87.
|
||||
|
||||
## [0.0.87] - 2025-10-02
|
||||
|
||||
### Added
|
||||
|
||||
- Added `WebsocketSTTService` base class for websocket-based STT services.
|
||||
Combines STT functionality with websocket connectivity, providing automatic
|
||||
error handling and reconnection capabilities with exponential backoff.
|
||||
|
||||
- Added `DeepgramFluxSTTService` for real-time speech recognition using
|
||||
Deepgram's Flux WebSocket API. Flux understands conversational flow and
|
||||
automatically handles turn-taking.
|
||||
|
||||
- Added RTVI messages for user/bot audio levels and system logs.
|
||||
|
||||
- Include OpenAI-based LLM services cached tokens to `MetricsFrame`.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated the default model for `AnthropicLLMService` to
|
||||
`claude-sonnet-4-5-20250929`.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `DailyTransportMessageFrame` and `DailyTransportMessageUrgentFrame` are
|
||||
deprecated, use `DailyOutputTransportMessageFrame` and
|
||||
`DailyOutputTransportMessageUrgentFrame` respectively instead.
|
||||
|
||||
- `LiveKitTransportMessageFrame` and `LiveKitTransportMessageUrgentFrame` are
|
||||
deprecated, use `LiveKitOutputTransportMessageFrame` and
|
||||
`LiveKitOutputTransportMessageUrgentFrame` respectively instead.
|
||||
|
||||
- `TransportMessageFrame` and `TransportMessageUrgentFrame` are deprecated, use
|
||||
`OutputTransportMessageFrame` and `OutputTransportMessageUrgentFrame`
|
||||
respectively instead.
|
||||
|
||||
- `InputTransportMessageUrgentFrame` is deprecated, use
|
||||
`InputTransportMessageFrame` instead.
|
||||
|
||||
- `DailyUpdateRemoteParticipantsFrame` is deprecated and will be removed in a
|
||||
future version. Instead, create your own custom frame and handle it in the
|
||||
`@transport.output().event_handler("on_after_push_frame")` event handler or a
|
||||
custom processor.
|
||||
|
||||
## Fixed
|
||||
|
||||
- Fixed an issue in `AWSBedrockLLMService` where timeout exceptions weren't
|
||||
being detected.
|
||||
|
||||
- Fixed a `PipelineTask` issue that could prevent the application to exit if
|
||||
`task.cancel()` was called when the task was already finished.
|
||||
|
||||
- Fixed an issue where local SmartTurn was not being ran in a separate thread.
|
||||
|
||||
## [0.0.86] - 2025-09-24
|
||||
|
||||
### Added
|
||||
|
||||
- Added `HeyGenTransport`. This is an integration for HeyGen Interactive
|
||||
Avatar. A video service that handles audio streaming and requests HeyGen to
|
||||
generate avatar video responses. (see https://www.heygen.com/). When used, the
|
||||
Pipecat bot joins the same virtual room as the HeyGen Avatar and the user.
|
||||
|
||||
- Added support to `TwilioFrameSerializer` for `region` and `edge` settings.
|
||||
|
||||
- Added support for using universal `LLMContext` with:
|
||||
|
||||
- `LLMLogObserver`
|
||||
- `GatedLLMContextAggregator` (formerly `GatedOpenAILLMContextAggregator`)
|
||||
- `LangchainProcessor`
|
||||
- `Mem0MemoryService`
|
||||
|
||||
- Added `StrandsAgentProcessor` which allows you to use the Strands Agents
|
||||
framework to build your voice agents.
|
||||
See https://strandsagents.com
|
||||
|
||||
- Added `ElevenLabsSTTService` for speech-to-text transcription.
|
||||
|
||||
- Added a peer connection monitor to the `SmallWebRTCConnection` that
|
||||
automatically disconnects if the connection fails to establish within
|
||||
the timeout (1 minute by default).
|
||||
|
||||
- Added memory cleanup improvements to reduce memory peaks.
|
||||
|
||||
- Added `on_before_process_frame`, `on_after_process_frame`,
|
||||
`on_before_push_frame` and `on_after_push_frame`. These are synchronous events
|
||||
that get called before and after a frame is processed or pushed. Note that
|
||||
these events are synchrnous so they should ideally perform lightweight tasks
|
||||
in order to not block the pipeline. See
|
||||
`examples/foundational/45-before-and-after-events.py`.
|
||||
|
||||
- Added `on_before_leave` synchronous event to `DailyTransport`.
|
||||
|
||||
- Added `on_before_disconnect` synchronous event to `LiveKitTransport`.
|
||||
|
||||
- It is now possible to register synchronous event handlers. By default, all
|
||||
event handlers are executed in a separate task. However, in some cases we want
|
||||
to guarantee order of execution, for example, executing something before
|
||||
disconnecting a transport.
|
||||
|
||||
```python
|
||||
self._register_event_handler("on_event_name", sync=True)
|
||||
```
|
||||
|
||||
- Added support for global location in `GoogleVertexLLMService`. The service now
|
||||
supports both regional locations (e.g., "us-east4") and the "global" location
|
||||
for Vertex AI endpoints. When using "global" location, the service will use
|
||||
`aiplatform.googleapis.com` as the API host instead of the regional format.
|
||||
|
||||
- Added `on_pipeline_finished` event to `PipelineTask`. This event will get
|
||||
fired when the pipeline is done running. This can be the result of a
|
||||
`StopFrame`, `CancelFrame` or `EndFrame`.
|
||||
|
||||
```python
|
||||
@task.event_handler("on_pipeline_finished")
|
||||
async def on_pipeline_finished(task: PipelineTask, frame: Frame):
|
||||
...
|
||||
```
|
||||
|
||||
- Added support for new RTVI `send-text` event, along with the ability to toggle
|
||||
the audio response off (skip tts) while handling the new context.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `aiortc` to 1.13.0.
|
||||
|
||||
- Updated `sentry` to 2.38.0.
|
||||
|
||||
- `BaseOutputTransport` methods `write_audio_frame` and `write_video_frame` now
|
||||
return a boolean to indicate if the transport implementation was able to write
|
||||
the given frame or not.
|
||||
|
||||
- Updated Silero VAD model to v6.
|
||||
|
||||
- Updated `livekit` to 1.0.13.
|
||||
|
||||
- `torch` and `torchaudio` are no longer required for running Smart Turn
|
||||
locally. This avoids gigabytes of dependencies being installed.
|
||||
|
||||
- Updated `websockets` dependency to support version 15.0. Removed deprecated
|
||||
usage of `ConnectionClosed.code` and `ConnectionClosed.reason` attributes in
|
||||
`AWSTranscribeSTTService` for compatibility.
|
||||
|
||||
- Refactored `pyproject.toml` to reduce websockets dependency repetition using
|
||||
self-referencing extras. All websockets-dependent services now reference a
|
||||
shared `websockets-base` extra.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `GladiaSTTService`'s `confidence` arg is deprecated. `confidence` is no
|
||||
longer needed to determine which transcription or translation frames to
|
||||
emit.
|
||||
|
||||
- `PipelineTask` events `on_pipeline_stopped`, `on_pipeline_ended` and
|
||||
`on_pipeline_cancelled` are now deprecated. Use `on_pipeline_finished`
|
||||
instead.
|
||||
|
||||
- Support for the RTVI `append-to-context` event, in lieu of the new `send-text`
|
||||
event and making way for future events like `send-image`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where the pipeline could freeze if a task cancellation never
|
||||
completed because a third-party library swallowed asyncio.CancelledError. We
|
||||
now apply a timeout to task cancellations to prevent these freezes. If the
|
||||
timeout is reached, the system logs warnings and leaves dangling tasks behind,
|
||||
which can help diagnose where cancellation is being blocked.
|
||||
|
||||
- Fixed an `AudioBufferProcessor` issues that was causing user audio to be
|
||||
missing in stereo recordings causing bot and user overlaps.
|
||||
|
||||
- Fixed a `BaseOutputTransport` issue that could produce large saved
|
||||
`AudioBufferProcessor` files when using an audio mixer.
|
||||
|
||||
- Fixed a `PipelineRunner` issue on Windows where setting up SIGINT and SIGTERM
|
||||
was raising an exception.
|
||||
|
||||
- Fixed an issue where multiple handlers for an event would not run in parallel.
|
||||
|
||||
- Fixed `DailyTransport.sip_call_transfer()` to automatically use the session
|
||||
ID from the `on_dialin_connected` event, when not explicitly provided. Now
|
||||
supports cold transfers (from incoming dial-in calls) by automatically
|
||||
tracking session IDs from connection events.
|
||||
|
||||
- Fixed a memory leak in `SmallWebRTCTransport`. In `aiortc`, when you receive
|
||||
a `MediaStreamTrack` (audio or video), frames are produced asynchronously. If
|
||||
the code never consumes these frames, they are queued in memory, causing a
|
||||
memory leak.
|
||||
|
||||
- Fixed an issue in `AsyncAITTSService`, where `TTSTextFrames` were not being
|
||||
pushed.
|
||||
|
||||
- Fixed an issue that would cause `push_interruption_task_frame_and_wait()` to
|
||||
not wait if a previous interruption had already happened.
|
||||
|
||||
- Fixed a couple of bugs in `ServiceSwitcher`:
|
||||
|
||||
- Using multiple `ServiceSwitcher`s in a pipeline would result in an error.
|
||||
- `ServiceSwitcherFrame`s (such as `ManuallySwitchServiceFrame`s) were having
|
||||
an effect too early, essentially "jumping the queue" in terms of pipeline
|
||||
frame ordering.
|
||||
|
||||
- Fixed a self-cancellation deadlock in `UserIdleProcessor` when returning
|
||||
`False` from an idle callback. The task now terminates naturally instead of
|
||||
attempting to cancel itself.
|
||||
|
||||
- Fixed an issue in `AudioBufferProcessor` where a recording is not created
|
||||
when a bot speaks and user input is blocked.
|
||||
|
||||
- Fixed a `FastAPIWebsocketTransport` and `SmallWebRTCTransport` issue where
|
||||
`on_client_disconnected` would be triggered when the bot ends the
|
||||
conversation. That is, `on_client_disconnected` should only be triggered when
|
||||
the remote client actually disconnects.
|
||||
|
||||
- Fixed an issue in `HeyGenVideoService` where the `BotStartedSpeakingFrame`
|
||||
was blocked from moving through the Pipeline.
|
||||
|
||||
## [0.0.85] - 2025-09-12
|
||||
|
||||
### Added
|
||||
|
||||
- `AzureSTTService` now pushes interim transcriptions.
|
||||
|
||||
- Added `voice_cloning_key` to `GoogleTTSService` to support custom cloned
|
||||
voices.
|
||||
|
||||
- Added `speaking_rate` to `GoogleTTSService.InputParams` to control the
|
||||
speaking rate.
|
||||
|
||||
- Added a `speed` arg to `OpenAITTSService` to control the speed of the voice
|
||||
response.
|
||||
|
||||
- Added `FrameProcessor.push_interruption_task_frame_and_wait()`. Use this
|
||||
method to programatically interrupt the bot from any part of the
|
||||
pipeline. This guarantees that all the processors in the pipeline are
|
||||
interrupted in order (from upstream to downstream). Internally, this works by
|
||||
first pushing an `InterruptionTaskFrame` upstream until it reaches the
|
||||
pipeline task. The pipeline task then generates an `InterruptionFrame`, which
|
||||
flows downstream through all processors. Once the `InterruptionFrame` has
|
||||
reaches the processor waiting for the interruption, the function returns and
|
||||
execution continues after the call. Think of it as sending an upstream request
|
||||
for interruption and waiting until the acknowledgment flows back downstream.
|
||||
|
||||
- Added new base `TaskFrame` (which is a system frame). This is the base class
|
||||
for all task frames (`EndTaskFrame`, `CancelTaskFrame`, etc.) that are meant
|
||||
to be pushed upstream to reach the pipeline task.
|
||||
|
||||
- Expanded support for universal `LLMContext` to the AWS Bedrock LLM service.
|
||||
Using the universal `LLMContext` and associated `LLMContextAggregatorPair` is
|
||||
a pre-requisite for using `LLMSwitcher` to switch between LLMs at runtime.
|
||||
|
||||
- Added new fields to the development runner's `parse_telephony_websocket`
|
||||
method in support of providing dynamic data to a bot.
|
||||
|
||||
- Twilio: Added a new `body` parameter, which parses the websocket message
|
||||
for `customParameters`. Provide data via the `Parameter` nouns in your
|
||||
TwiML to use this feature.
|
||||
- Telnyx & Exotel: Both providers make the `to` and `from` phone numbers
|
||||
available in the websocket messages. You can now access these numbers as
|
||||
`call_data["to"]` and `call_data["from"]`.
|
||||
|
||||
Note: Each telephony provider offers different features. Refer to the
|
||||
corresponding example in `pipecat-examples` to see how to pass custom data
|
||||
to your bot.
|
||||
|
||||
- Added `body` to the `WebsocketRunnerArguments` as an optional parameter.
|
||||
Custom `body` information can be passed from the server into the bot file via
|
||||
the `bot()` method using this new parameter.
|
||||
|
||||
- Added video streaming support to `LiveKitTransport`.
|
||||
|
||||
- Added `OpenAIRealtimeLLMService` and `AzureRealtimeLLMService` which provide
|
||||
access to OpenAI Realtime.
|
||||
|
||||
### Changed
|
||||
|
||||
- `pipeline.tests.utils.run_test()` now allows passing `PipelineParams` instead
|
||||
of individual parameters.
|
||||
|
||||
### Removed
|
||||
|
||||
- Remove `VisionImageRawFrame` in favor of context frames (`LLMContextFrame` or
|
||||
`OpenAILLMContextFrame`).
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `BotInterruptionFrame` is now deprecated, use `InterruptionTaskFrame` instead.
|
||||
|
||||
- `StartInterruptionFrame` is now deprected, use `InterruptionFrame` instead.
|
||||
|
||||
- Deprecate `VisionImageFrameAggregator` because `VisionImageRawFrame` has been
|
||||
removed. See the `12*` examples for the new recommended replacement pattern.
|
||||
|
||||
- `NoisereduceFilter` is now deprecated and will be removed in a future
|
||||
version. Use other audio filters like `KrispFilter` or `AICFilter`.
|
||||
|
||||
- Deprecated `OpenAIRealtimeBetaLLMService` and `AzureRealtimeBetaLLMService`.
|
||||
Use `OpenAIRealtimeLLMService` and `AzureRealtimeLLMService`, respectively.
|
||||
Each service will be removed in an upcoming version, 1.0.0.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `BaseOutputTransport` issue that caused incorrect detection of when
|
||||
the bot stopped talking while using an audio mixer.
|
||||
|
||||
- Fixed a `LiveKitTransport` issue where RTVI messages were not properly
|
||||
encoded.
|
||||
|
||||
- Add additional fixups to Mistral context messages to ensure they meet
|
||||
Mistral-specific requirements, avoiding Mistral "invalid request" errors.
|
||||
|
||||
- Fixed `DailyTransport` transcription handling to gracefully handle missing
|
||||
`rawResponse` field in transcription messages, preventing KeyError crashes.
|
||||
|
||||
## [0.0.84] - 2025-09-05
|
||||
|
||||
### Added
|
||||
|
||||
- Add the ability to send DTMF to `LiveKitTransport`.
|
||||
|
||||
- Expanded support for universal `LLMContext` to the Anthropic LLM service.
|
||||
Using the universal `LLMContext` and associated `LLMContextAggregatorPair` is
|
||||
a pre-requisite for using `LLMSwitcher` to switch between LLMs at runtime.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `daily-python` to 0.19.9.
|
||||
|
||||
- Restored `DailyTransport`'s native DTMF support using Daily's `send_dtmf()`
|
||||
method instead of generated audio tones.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `AWSBedrockLLMService` crash caused by an extra `await`.
|
||||
|
||||
- Fixed a `OpenAIImageGenService` issue where it was not creating
|
||||
`URLImageRawFrame` correctly.
|
||||
|
||||
## [0.0.83] - 2025-09-03
|
||||
|
||||
### Added
|
||||
|
||||
- Added multilingual support for AsyncAI in `AsyncAITTSService` and `AsyncAIHttpTTSService`.
|
||||
|
||||
- New `languages`: `es`, `fr`, `de`, `it`.
|
||||
|
||||
- Added new frames `InputTransportMessageUrgentFrame` and
|
||||
`DailyInputTransportMessageUrgentFrame` for transport messages received from
|
||||
external sources.
|
||||
|
||||
- Added `UserSpeakingFrame`. This will be sent upstream and downstream while VAD
|
||||
detects the user is speaking.
|
||||
|
||||
- Expanded support for universal `LLMContext` to more LLM services. Using the
|
||||
universal `LLMContext` and associated `LLMContextAggregatorPair` is a
|
||||
pre-requisite for using `LLMSwitcher` to switch between LLMs at runtime.
|
||||
Here are the newly-supported services:
|
||||
|
||||
- Azure
|
||||
- Cerebras
|
||||
- Deepseek
|
||||
- Fireworks AI
|
||||
- Google Vertex AI
|
||||
- Grok
|
||||
- Groq
|
||||
- Mistral
|
||||
- NVIDIA NIM
|
||||
- Ollama
|
||||
- OpenPipe
|
||||
- OpenRouter
|
||||
- Perplexity
|
||||
- Qwen
|
||||
- SambaNova
|
||||
- Together.ai
|
||||
|
||||
- Added support for WhatsApp User-initiated Calls.
|
||||
|
||||
- Added new audio filter `AICFilter`, speech enhancement for improving VAD/STT
|
||||
performance, no ONNX dependency.
|
||||
See https://ai-coustics.com/sdk/
|
||||
|
||||
- Added a timeout around cancel input tasks to prevent indefinite hangs when
|
||||
cancellation is swallowed by third-party code.
|
||||
|
||||
- Added `pipecat.extensions.ivr` for automated IVR system navigation with
|
||||
configurable goals and conversation handling. Supports DTMF input, verbal
|
||||
responses, and intelligent menu traversal.
|
||||
|
||||
Basic usage:
|
||||
|
||||
```python
|
||||
from pipecat.extensions.ivr.ivr_navigator import IVRNavigator
|
||||
|
||||
# Create IVR navigator with your goal
|
||||
ivr_navigator = IVRNavigator(
|
||||
llm=llm_service,
|
||||
ivr_prompt="Navigate to billing department to dispute a charge"
|
||||
)
|
||||
|
||||
# Handle different outcomes
|
||||
@ivr_navigator.event_handler("on_conversation_detected")
|
||||
async def on_conversation(processor, conversation_history):
|
||||
# Switch to normal conversation mode
|
||||
pass
|
||||
|
||||
@ivr_navigator.event_handler("on_ivr_status_changed")
|
||||
async def on_ivr_status(processor, status):
|
||||
if status == IVRStatus.COMPLETED:
|
||||
# End pipeline, transfer call, or start bot conversation
|
||||
elif status == IVRStatus.STUCK:
|
||||
# Handle navigation failure
|
||||
```
|
||||
|
||||
- `BaseOutputTransport` now implements `write_dtmf()` by loading DTMF audio and
|
||||
sending it through the transport. This makes sending DTMF generic across all
|
||||
output transports.
|
||||
|
||||
- Added new config parameters to `GladiaSTTService`.
|
||||
- PreProcessingConfig > `audio_enhancer` to enhance audio quality.
|
||||
- CustomVocabularyItem > `pronunciations` and `language` to specify special
|
||||
pronunciations and in which language it will be pronounced.
|
||||
|
||||
### Changed
|
||||
|
||||
- `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame` are also pushed
|
||||
upstream.
|
||||
|
||||
- `ParallelPipeline` now waits for `CancelFrame` to finish in all branches
|
||||
before pushing it downstream.
|
||||
|
||||
- Added `sip_codecs` to the `DailyRoomSipParams`.
|
||||
|
||||
- Updated the `configure()` function in `pipecat.runner.daily` to include new
|
||||
args to create SIP-enabled rooms. Additionally, added new args to control the
|
||||
room and token expiration durations.
|
||||
|
||||
- `pipecat.frames.frames.KeypadEntry` is deprecated and has been moved to
|
||||
`pipecat.audio.dtmf.types.KeypadEntry`.
|
||||
|
||||
- Updated `RimeTTSService`'s flush_audio message to conform with Rime's official
|
||||
API.
|
||||
|
||||
- Updated the default model for `CerebrasLLMService` to GPT-OSS-120B.
|
||||
|
||||
### Removed
|
||||
|
||||
- Remove `StopInterruptionFrame`. This was a legacy frame that was not being
|
||||
used really anywhere and it didn't provide any useful meaning. It was only
|
||||
pushed after `UserStoppedSpeakingFrame`, so developers can just use
|
||||
`UserStoppedSpeakingFrame`.
|
||||
|
||||
- `DailyTransport.write_dtmf()` has been removed in favor of the generic
|
||||
`BaseOutputTransport.write_dtmf()`.
|
||||
|
||||
- Remove deprecated `DailyTransport.send_dtmf()`.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- Transports have been re-organized.
|
||||
|
||||
```
|
||||
pipecat.transports.network.small_webrtc -> pipecat.transports.smallwebrtc.transport
|
||||
pipecat.transports.network.webrtc_connection -> pipecat.transports.smallwebrtc.connection
|
||||
pipecat.transports.network.websocket_client -> pipecat.transports.websocket.client
|
||||
pipecat.transports.network.websocket_server -> pipecat.transports.websocket.server
|
||||
pipecat.transports.network.fastapi_websocket -> pipecat.transports.websocket.fastapi
|
||||
pipecat.transports.services.daily -> pipecat.transports.daily.transport
|
||||
pipecat.transports.services.helpers.daily_rest -> pipecat.transports.daily.utils
|
||||
pipecat.transports.services.livekit -> pipecat.transports.livekit.transport
|
||||
pipecat.transports.services.tavus -> pipecat.transports.tavus.transport
|
||||
```
|
||||
|
||||
- `pipecat.frames.frames.KeypadEntry` is deprecated use
|
||||
`pipecat.audio.dtmf.types.KeypadEntry` instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where messages received from the transport were always being resent.
|
||||
|
||||
- Fixed `SmallWebRTCTransport` to not use `mid` to decide if the transceiver should
|
||||
be `sendrecv` or not.
|
||||
|
||||
- Fixed an issue where Deepgram swallowed `asyncio.CancelledError` during
|
||||
disconnect, preventing tasks from being cancelled.
|
||||
|
||||
- Fixed an issue where `PipelineTask` was not cleaning up the observers.
|
||||
|
||||
### Performance
|
||||
|
||||
- Reduced latency and improved memory performance in `Mem0MemoryService`.
|
||||
|
||||
## [0.0.82] - 2025-08-28
|
||||
|
||||
### Added
|
||||
|
||||
- Added a new `LLMRunFrame` to trigger an LLM response:
|
||||
|
||||
```python
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
```
|
||||
|
||||
This replaces `OpenAILLMContextFrame`, which you’d previously typically use
|
||||
like this:
|
||||
|
||||
```python
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
```
|
||||
|
||||
Use this way of kicking off your conversation when you’ve already initialized
|
||||
your context and are simply instructing the bot when to go:
|
||||
|
||||
```python
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
# ...
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
```
|
||||
|
||||
Note that if you want to add new messages when kicking off the conversation,
|
||||
you could use `LLMMessagesAppendFrame` with `run_llm=True` instead:
|
||||
|
||||
```python
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesAppendFrame(new_messages, run_llm=True)])
|
||||
```
|
||||
|
||||
In the rare case you don’t have a context aggregator in your pipeline, then
|
||||
you may continue using a context frame.
|
||||
|
||||
- Added support for switching between audio+text to text-only modes within the
|
||||
same pipeline. This is done by pushing
|
||||
`LLMConfigureOutputFrame(skip_tts=True)` to enter text-only mode, and
|
||||
disabling it to return to audio+text. The LLM will still generate tokens and
|
||||
add them to the context, but they will not be sent to TTS.
|
||||
|
||||
- Added `skip_tts` field to `TextFrame`. This lets a text frame bypass TTS while
|
||||
still being included in the LLM context. Useful for cases like structured text
|
||||
that isn’t meant to be spoken but should still contribute to context.
|
||||
|
||||
- Added a `cancel_timeout_secs` argument to `PipelineTask` which defines how
|
||||
long the pipeline has to complete cancellation. When `PipelineTask.cancel()`
|
||||
is called, a `CancelFrame` is pushed through the pipeline and must reach the
|
||||
end. If it does not reach the end within the specified time, a warning is
|
||||
shown and the wait is aborted.
|
||||
|
||||
- Added a new "universal" (LLM-agnostic) `LLMContext` and accompanying
|
||||
`LLMContextAggregatorPair`, which will eventually replace `OpenAILLMContext`
|
||||
(and the other under-the-hood contexts) and the other context aggregators.
|
||||
The new universal `LLMContext` machinery allows a single context to be shared
|
||||
between different LLMs, enabling runtime LLM switching and scenarios like
|
||||
failover.
|
||||
|
||||
From the developer's point of view, switching to using the new universal
|
||||
context machinery will usually be a matter of going from this:
|
||||
|
||||
```python
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
```
|
||||
|
||||
To this:
|
||||
|
||||
```python
|
||||
context = LLMContext(messages, tools)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
```
|
||||
|
||||
To start, the universal `LLMContext` is supported with the following LLM
|
||||
services:
|
||||
|
||||
- `OpenAILLMService`
|
||||
- `GoogleLLMService`
|
||||
|
||||
- Added a new `LLMSwitcher` class to enable runtime LLM switching, built atop a
|
||||
new generic `ServiceSwitcher`.
|
||||
|
||||
Switchers take a switching strategy. The first available strategy is
|
||||
`ServiceSwitcherStrategyManual`.
|
||||
|
||||
To switch LLMs at runtime, the LLMs must be sharing one instance of the new
|
||||
universal `LLMContext` (see above bullet).
|
||||
|
||||
```python
|
||||
# Instantiate your LLM services
|
||||
llm_openai = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm_google = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
|
||||
# Instantiate a switcher
|
||||
# (ServiceSwitcherStrategyManual defaults to OpenAI, as it's first in the list)
|
||||
llm_switcher = LLMSwitcher(
|
||||
llms=[llm_openai, llm_google], strategy_type=ServiceSwitcherStrategyManual
|
||||
)
|
||||
|
||||
# Create your pipeline
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm_switcher,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
# ...
|
||||
# Whenever is appropriate, switch LLMs!
|
||||
await task.queue_frames([ManuallySwitchServiceFrame(service=llm_google)])
|
||||
```
|
||||
|
||||
- Added an `LLMService.run_inference()` method to LLM services to enable
|
||||
direct, out-of-band (i.e. out-of-pipeline) inference.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `daily-python` to 0.19.8.
|
||||
|
||||
- `PipelineTask` now waits for `StartFrame` to reach the end of the pipeline
|
||||
before pushing any other frames.
|
||||
|
||||
- Updated `CartesiaTTSService` and `CartesiaHttpTTSService` to align with
|
||||
Cartesia's changes for the `speed` parameter. It now takes only an enum of
|
||||
`slow`, `normal`, or `fast`.
|
||||
|
||||
- Added support to `AWSBedrockLLMService` for setting authentication
|
||||
credentials through environment variables.
|
||||
|
||||
- Updated `SarvamTTSService` to use WebSocket streaming for real-time audio
|
||||
generation with multiple Indian languages, with HTTP support still available
|
||||
via `SarvamHttpTTSService`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an RTVI issue that was causing frames to be pushed before pipeline was
|
||||
properly initialized.
|
||||
|
||||
- Fixed some `get_messages_for_logging()` that were returning a JSON string
|
||||
instead of a list.
|
||||
|
||||
- Fixed a `DailyTransport` issue that prevented DTMF tones from being sent.
|
||||
|
||||
- Fixed a missing import in `SentryMetrics`.
|
||||
|
||||
- Fixed `AWSPollyTTSService` to support AWS credential provider chain (IAM
|
||||
roles, IRSA, instance profiles) instead of requiring explicit environment
|
||||
variables.
|
||||
|
||||
- Fixed a `CartesiaTTSService` issue that was causing the application to hang
|
||||
after Cartesia's 5 minutes timed out.
|
||||
|
||||
- Fixed an issue preventing `SpeechmaticsSTTService` from transcribing audio.
|
||||
|
||||
## [0.0.81] - 2025-08-25
|
||||
|
||||
### Added
|
||||
|
||||
- Added `pipecat.extensions.voicemail`, a module for detecting voicemail vs.
|
||||
live conversation, primarily intended for use in outbound calling scenarios.
|
||||
The voicemail module is optimized for text LLMs only.
|
||||
|
||||
- Added new frames to the `idle_timeout_frames` arg: `TranscriptionFrame`,
|
||||
`InterimTranscriptionFrame`, `UserStartedSpeakingFrame`, and
|
||||
`UserStoppedSpeakingFrame`. These additions serve as indicators of user
|
||||
activity in the pipeline idle detection logic.
|
||||
|
||||
- Allow passing custom pipeline sink and source processors to a
|
||||
`Pipeline`. Pipeline source and sink processors are used to know and control
|
||||
what's coming in and out of a `Pipeline` processor.
|
||||
|
||||
- Added `FrameProcessor.pause_processing_system_frames()` and
|
||||
`FrameProcessor.resume_processing_system_frames()`. These allow to pause and
|
||||
resume the processing of system frame.
|
||||
|
||||
- Added new `on_process_frame()` observer method which makes it possible to know
|
||||
when a frame is being processed.
|
||||
|
||||
- Added new `FrameProcessor.entry_processor()` method. This allows you to access
|
||||
the first non-compound processor in a pipeline.
|
||||
|
||||
- Added `FrameProcessor` properties `processors`, `next` and `previous`.
|
||||
|
||||
- `ElevenLabsTTSService` now supports additional runtime changes to the `model`,
|
||||
`language`, and `voice_settings` parameters.
|
||||
|
||||
- Added `apply_text_normalization` support to `ElevenLabsTTSService` and
|
||||
`ElevenLabsHttpTTSService`.
|
||||
|
||||
- Added `MistralLLMService`, using Mistral's chat completion API.
|
||||
|
||||
- Added the ability to retry executing a chat completion after a timeout period
|
||||
for `OpenAILLMService` and its subclasses, `AnthropicLLMService`, and
|
||||
`AWSBedrockLLMService`. The LLM services accept new args:
|
||||
`retry_timeout_secs` and `retry_on_timeout`. This feature is disabled by
|
||||
default.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `daily-python` to 0.19.7.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `FrameProcessor.wait_for_task()` is deprecated. Use `await task` or
|
||||
`await asyncio.wait_for(task, timeout)` instead.
|
||||
|
||||
### Removed
|
||||
|
||||
- Watchdog timers have been removed. They were introduced in 0.0.72 to help
|
||||
diagnose pipeline freezes. Unfortunately, they proved ineffective since they
|
||||
required developers to use Pipecat-specific queues, iterators, and events to
|
||||
correctly reset the timer, which limited their usefulness and added friction.
|
||||
|
||||
- Removed unused `FrameProcessor.set_parent()` and
|
||||
`FrameProcessor.get_parent()`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue that would cause `PipelineRunner` and `PipelineTask` to not
|
||||
handle external asyncio task cancellation properly.
|
||||
|
||||
- Added `SpeechmaticsSTTService` exception handling on connection and sending.
|
||||
|
||||
- Replaced `asyncio.wait_for()` for `wait_for2.wait_for()` for Python <
|
||||
3.12. because of issues regarding task cancellation (i.e. cancellation is
|
||||
never propagated).
|
||||
See https://bugs.python.org/issue42130
|
||||
|
||||
- Fixed an `AudioBufferProcessor` issues that would cause audio overlap when
|
||||
setting a max buffer size.
|
||||
|
||||
- Fixed an issue where `AsyncAITTSService` had very high latency in responding
|
||||
by adding `force=true` when sending the flush command.
|
||||
|
||||
### Performance
|
||||
|
||||
- Improve `PipelineTask` performance by using direct mode processors and by
|
||||
removing unnecessary tasks.
|
||||
|
||||
- Improve `ParallelPipeline` performance by using direct mode, by not
|
||||
creating a task for each frame and every sub-pipeline and also by removing
|
||||
other unnecessary tasks.
|
||||
|
||||
- `Pipeline` performance improvements by using direct mode.
|
||||
|
||||
### Other
|
||||
|
||||
- Added `14w-function-calling-mistal.py` using `MistralLLMService`.
|
||||
|
||||
- Added `13j-azure-transcription.py` using `AzureSTTService`.
|
||||
|
||||
## [0.0.80] - 2025-08-13
|
||||
|
||||
### Added
|
||||
@@ -622,7 +1500,7 @@ quality and critical bugs impacting `ParallelPipelines` functionality.**
|
||||
- Added `session_token` parameter to `AWSNovaSonicLLMService`.
|
||||
|
||||
- Added Gemini Multimodal Live File API for uploading, fetching, listing, and
|
||||
deleting files. See `26f-gemini-multimodal-live-files-api.py` for example usage.
|
||||
deleting files. See `26f-gemini-live-files-api.py` for example usage.
|
||||
|
||||
### Changed
|
||||
|
||||
@@ -2628,7 +3506,7 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
|
||||
- Added the new modalities option and helper function to set Gemini output
|
||||
modalities.
|
||||
|
||||
- Added `examples/foundational/26d-gemini-multimodal-live-text.py` which is
|
||||
- Added `examples/foundational/26d-gemini-live-text.py` which is
|
||||
using Gemini as TEXT modality and using another TTS provider for TTS process.
|
||||
|
||||
### Changed
|
||||
@@ -2815,9 +3693,9 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
|
||||
- Added new foundational examples for `GeminiMultimodalLiveLLMService`:
|
||||
|
||||
- `26-gemini-multimodal-live.py`
|
||||
- `26a-gemini-multimodal-live-transcription.py`
|
||||
- `26b-gemini-multimodal-live-video.py`
|
||||
- `26c-gemini-multimodal-live-video.py`
|
||||
- `26a-gemini-live-transcription.py`
|
||||
- `26b-gemini-live-video.py`
|
||||
- `26c-gemini-live-video.py`
|
||||
|
||||
- Added `SimliVideoService`. This is an integration for Simli AI avatars.
|
||||
(see https://www.simli.com)
|
||||
|
||||
336
COMMUNITY_INTEGRATIONS.md
Normal file
@@ -0,0 +1,336 @@
|
||||
# Community Integrations Guide
|
||||
|
||||
Pipecat welcomes community-maintained integrations! As our ecosystem grows, we've established a process for any developer to create and maintain their own service integrations while ensuring discoverability for the Pipecat community.
|
||||
|
||||
## Overview
|
||||
|
||||
**What we support:** Community-maintained integrations that live in separate repositories and are maintained by their authors.
|
||||
|
||||
**What we don't do:** The Pipecat team does not code review, test, or maintain community integrations. We provide guidance and list approved integrations for discoverability.
|
||||
|
||||
**Why this approach:** This allows the community to move quickly while keeping the Pipecat core team focused on maintaining the framework itself.
|
||||
|
||||
## Submitting your Integration
|
||||
|
||||
To be listed as an official community integration, follow these steps:
|
||||
|
||||
### Step 1: Build Your Integration
|
||||
|
||||
Create your integration following the patterns and examples shown in the "Integration Patterns and Examples" section below.
|
||||
|
||||
### Step 2: Set Up Your Repository
|
||||
|
||||
Your repository must contain these components:
|
||||
|
||||
- **Source code** - Complete implementation following Pipecat patterns
|
||||
- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational))
|
||||
- **README.md** - Must include:
|
||||
|
||||
- Introduction and explanation of your integration
|
||||
- Installation instructions
|
||||
- Usage instructions with Pipecat Pipeline
|
||||
- How to run your example
|
||||
- Pipecat version compatibility (e.g., "Tested with Pipecat v0.0.86")
|
||||
- Company attribution: If you work for the company providing the service, please mention this in your README. This helps build confidence that the integration will be actively maintained.
|
||||
|
||||
- **LICENSE** - Permissive license (BSD-2 like Pipecat, or equivalent open source terms)
|
||||
- **Code documentation** - Source code with docstrings (we recommend following [Pipecat's docstring conventions](https://github.com/pipecat-ai/pipecat/blob/main/CONTRIBUTING.md#docstring-conventions))
|
||||
- **Changelog** - Maintain a changelog for version updates
|
||||
|
||||
### Step 3: Join Discord
|
||||
|
||||
Join our Discord: https://discord.gg/pipecat
|
||||
|
||||
### Step 4: Submit for Listing
|
||||
|
||||
Submit a pull request to add your integration to our [Community Integrations documentation page](https://docs.pipecat.ai/server/services/community-integrations).
|
||||
|
||||
**To submit:**
|
||||
|
||||
1. Fork the [Pipecat docs repository](https://github.com/pipecat-ai/docs)
|
||||
2. Edit the file `server/services/community-integrations.mdx`
|
||||
3. Add your integration to the appropriate service category table with:
|
||||
- Service name
|
||||
- Link to your repository
|
||||
- Maintainer GitHub username(s)
|
||||
4. Include a link to your demo video (approx 30-60 seconds) in your PR description showing:
|
||||
- Core functionality of your integration
|
||||
- Handling of an interruption (if applicable to service type)
|
||||
5. Submit your pull request
|
||||
|
||||
Once your PR is submitted, post in the `#community-integrations` Discord channel to let us know.
|
||||
|
||||
## Integration Patterns and Examples
|
||||
|
||||
### STT (Speech-to-Text) Services
|
||||
|
||||
#### Websocket-based Services
|
||||
|
||||
**Base class:** `STTService`
|
||||
|
||||
**Examples:**
|
||||
|
||||
- [DeepgramSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/deepgram/stt.py)
|
||||
- [SpeechmaticsSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/speechmatics/stt.py)
|
||||
|
||||
#### File-based Services
|
||||
|
||||
**Base class:** `SegmentedSTTService`
|
||||
|
||||
**Examples:**
|
||||
|
||||
- [RivaSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/riva/stt.py)
|
||||
- [FalSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/fal/stt.py)
|
||||
|
||||
#### Key requirements:
|
||||
|
||||
- STT services should push `InterimTranscriptionFrames` and `TranscriptionFrames`
|
||||
- If confidence values are available, filter for values >50% confidence
|
||||
|
||||
### LLM (Large Language Model) Services
|
||||
|
||||
#### OpenAI-Compatible Services
|
||||
|
||||
**Base class:** `OpenAILLMService`
|
||||
|
||||
**Examples:**
|
||||
|
||||
- [AzureLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/azure/llm.py)
|
||||
- [GrokLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/grok/llm.py) - Shows overriding the base class where needed
|
||||
|
||||
#### Non-OpenAI Compatible Services
|
||||
|
||||
**Requires:** Full implementation
|
||||
|
||||
**Examples:**
|
||||
|
||||
- [AnthropicLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/anthropic/llm.py)
|
||||
- [GoogleLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/llm.py)
|
||||
|
||||
#### Key requirements:
|
||||
|
||||
- **Frame sequence:** Output must follow this frame sequence pattern:
|
||||
|
||||
- `LLMFullResponseStartFrame` - Signals the start of an LLM response
|
||||
- `LLMTextFrame` - Contains LLM content, typically streamed as tokens
|
||||
- `LLMFullResponseEndFrame` - Signals the end of an LLM response
|
||||
|
||||
- **Context aggregation:** Implement context aggregation to collect user and assistant content:
|
||||
- Aggregators come in pairs with a `user()` instance and `assistant()` instance
|
||||
- Context must adhere to the `LLMContext` universal format
|
||||
- Aggregators should handle adding messages, function calls, and images to the context
|
||||
|
||||
### TTS (Text-to-Speech) Services
|
||||
|
||||
#### AudioContextWordTTSService
|
||||
|
||||
**Use for:** Websocket-based services supporting word/timestamp alignment
|
||||
|
||||
**Example:**
|
||||
|
||||
- [CartesiaTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/cartesia/tts.py)
|
||||
|
||||
#### InterruptibleTTSService
|
||||
|
||||
**Use for:** Websocket-based services without word/timestamp alignment, requiring disconnection on interruption
|
||||
|
||||
**Example:**
|
||||
|
||||
- [SarvamTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/sarvam/tts.py)
|
||||
|
||||
#### WordTTSService
|
||||
|
||||
**Use for:** HTTP-based services supporting word/timestamp alignment
|
||||
|
||||
**Example:**
|
||||
|
||||
- [ElevenLabsHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/elevenlabs/tts.py)
|
||||
|
||||
#### TTSService
|
||||
|
||||
**Use for:** HTTP-based services without word/timestamp alignment
|
||||
|
||||
**Example:**
|
||||
|
||||
- [GoogleHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/tts.py)
|
||||
|
||||
#### Key requirements:
|
||||
|
||||
- For websocket services, use asyncio WebSocket implementation (required for v13+ support)
|
||||
- Handle idle service timeouts with keepalives
|
||||
- TTSServices push both audio (`TTSRawAudioFrame`) and text (`TTSTextFrame`) frames
|
||||
|
||||
### Telephony Serializers
|
||||
|
||||
Pipecat supports telephony provider integration using websocket connections to exchange MediaStreams. These services use a FrameSerializer to serialize and deserialize inputs from the FastAPIWebsocketTransport.
|
||||
|
||||
**Examples:**
|
||||
|
||||
- [Twilio](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/serializers/twilio.py)
|
||||
- [Telnyx](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/serializers/telnyx.py)
|
||||
|
||||
#### Key requirements:
|
||||
|
||||
- Include hang-up functionality using the provider's native API, ideally using `aiohttp`
|
||||
- Support DTMF (dual-tone multi-frequency) events if the provider supports them:
|
||||
- Deserialize DTMF events from the provider's protocol to `InputDTMFFrame`
|
||||
- Use `KeypadEntry` enum for valid keypad entries (0-9, \*, #, A-D)
|
||||
- Handle invalid DTMF digits gracefully by returning `None`
|
||||
|
||||
### Image Generation Services
|
||||
|
||||
**Base class:** `ImageGenService`
|
||||
|
||||
**Examples:**
|
||||
|
||||
- [FalImageGenService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/fal/image.py)
|
||||
- [GoogleImageGenService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/image.py)
|
||||
|
||||
#### Key requirements:
|
||||
|
||||
- Must implement `run_image_gen` method returning an `AsyncGenerator`
|
||||
|
||||
### Vision Services
|
||||
|
||||
Vision services process images and provide analysis such as descriptions, object detection, or visual question answering.
|
||||
|
||||
**Base class:** `VisionService`
|
||||
|
||||
**Example:**
|
||||
|
||||
- [MoondreamVisionService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/moondream/vision.py)
|
||||
|
||||
#### Key requirements:
|
||||
|
||||
- Must implement `run_vision` method that takes an `LLMContext` and returns an `AsyncGenerator[Frame, None]`
|
||||
- The method processes the latest image in the context and yields frames with analysis results
|
||||
- Typically yields `TextFrame` objects containing descriptions or answers
|
||||
|
||||
## Implementation Guidelines
|
||||
|
||||
### Naming Conventions
|
||||
|
||||
- **STT:** `VendorSTTService`
|
||||
- **LLM:** `VendorLLMService`
|
||||
- **TTS:**
|
||||
- Websocket: `VendorTTSService`
|
||||
- HTTP: `VendorHttpTTSService`
|
||||
- **Image:** `VendorImageGenService`
|
||||
- **Vision:** `VendorVisionService`
|
||||
- **Telephony:** `VendorFrameSerializer`
|
||||
|
||||
### Metrics Support
|
||||
|
||||
Enable metrics in your service:
|
||||
|
||||
```python
|
||||
def can_generate_metrics(self) -> bool:
|
||||
"""Check if this service can generate processing metrics.
|
||||
|
||||
Returns:
|
||||
True, as this service supports metrics.
|
||||
"""
|
||||
return True
|
||||
```
|
||||
|
||||
### Dynamic Settings Updates
|
||||
|
||||
STT, LLM, and TTS services support `ServiceUpdateSettingsFrame` for dynamic configuration changes. The base STTService has an `_update_settings()` method that handles settings, and the private `_settings` `Dict` is used to store settings and provide access to the subclass.
|
||||
|
||||
```python
|
||||
async def set_language(self, language: Language):
|
||||
"""Set the recognition language and reconnect.
|
||||
|
||||
Args:
|
||||
language: The language to use for speech recognition.
|
||||
"""
|
||||
logger.info(f"Switching STT language to: [{language}]")
|
||||
self._settings["language"] = language
|
||||
await self._disconnect()
|
||||
await self._connect()
|
||||
```
|
||||
|
||||
Note that, in this example, Deepgram requires the websocket connection be disconnected and reconnected to reinitialize the service with the new value. Consider if your service requires reconnection.
|
||||
|
||||
### Sample Rate Handling
|
||||
|
||||
Sample rates are set via PipelineParams and passed to each frame processor at initialization. The pattern is to _not_ set the sample rate value in the constructor of a given service. Instead, use the `start()` method to initialize sample rates from the frame:
|
||||
|
||||
```python
|
||||
async def start(self, frame: StartFrame):
|
||||
"""Start the service."""
|
||||
await super().start(frame)
|
||||
self._settings["output_format"]["sample_rate"] = self.sample_rate
|
||||
await self._connect()
|
||||
```
|
||||
|
||||
Note that `self.sample_rate` is a `@property` set in the TTSService base class, which provides access to the private sample rate value obtained from the StartFrame.
|
||||
|
||||
### Tracing Decorators
|
||||
|
||||
Use Pipecat's tracing decorators:
|
||||
|
||||
- **STT:** `@traced_stt` - decorate a function that handles `transcript`, `is_final`, `language` as args
|
||||
- **LLM:** `@traced_llm` - decorate the `_process_context()` method
|
||||
- **TTS:** `@traced_tts` - decorate the `run_tts()` method
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Packaging and Distribution
|
||||
|
||||
- Use [uv](https://docs.astral.sh/uv/) for packaging (encouraged)
|
||||
- Consider releasing to PyPI for easier installation
|
||||
- Follow semantic versioning principles
|
||||
- Maintain a changelog
|
||||
|
||||
### HTTP Communication
|
||||
|
||||
For REST-based communication, use aiohttp. Pipecat includes this as a required dependency, so using it prevents adding an additional dependency to your integration.
|
||||
|
||||
### Error Handling
|
||||
|
||||
- Wrap API calls in appropriate try/catch blocks
|
||||
- Handle rate limits and network failures gracefully
|
||||
- Provide meaningful error messages
|
||||
- When errors occur, raise exceptions AND push `ErrorFrame`s to notify the pipeline:
|
||||
|
||||
```python
|
||||
from pipecat.frames.frames import ErrorFrame
|
||||
|
||||
try:
|
||||
# Your API call
|
||||
result = await self._make_api_call()
|
||||
except Exception as e:
|
||||
# Push error frame to pipeline
|
||||
await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
|
||||
# Raise or handle as appropriate
|
||||
raise
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
- Your foundational example serves as a valuable integration-level test
|
||||
- Unit tests are nice to have. As the Pipecat teams provides better guidance, we will encourage unit testing more
|
||||
|
||||
## Disclaimer
|
||||
|
||||
Community integrations are community-maintained and not officially supported by the Pipecat team. Users should evaluate these integrations independently. The Pipecat team reserves the right to remove listings that become unmaintained or problematic.
|
||||
|
||||
## Staying Up to Date
|
||||
|
||||
Pipecat evolves rapidly to support the latest AI technologies and patterns. While we strive to minimize breaking changes, they do occur as the framework matures.
|
||||
|
||||
**We strongly recommend:**
|
||||
|
||||
- Join our Discord at https://discord.gg/pipecat and monitor the `#announcements` channel for release notifications
|
||||
- Follow our changelog: https://github.com/pipecat-ai/pipecat/blob/main/CHANGELOG.md
|
||||
- Test your integration against new Pipecat releases promptly
|
||||
- Update your README with the last tested Pipecat version
|
||||
|
||||
This helps ensure your integration remains compatible and your users have clear expectations about version support.
|
||||
|
||||
## Questions?
|
||||
|
||||
Join our Discord community at https://discord.gg/pipecat and post in the `#community-integrations` channel for guidance and support.
|
||||
|
||||
For additional questions, you can also reach out to us at pipecat-ai@daily.co.
|
||||
@@ -1,5 +1,9 @@
|
||||
## Contributing to Pipecat
|
||||
|
||||
**Want to add a new service integration?**
|
||||
We encourage community-maintained integrations! Please see our [Community Integration Guide](COMMUNITY_INTEGRATIONS.md) for the process and requirements.
|
||||
|
||||
**Want to contribute to Pipecat core?**
|
||||
We welcome contributions of all kinds! Your help is appreciated. Follow these steps to get involved:
|
||||
|
||||
1. **Fork this repository**: Start by forking the Pipecat Documentation repository to your GitHub account.
|
||||
|
||||
126
README.md
@@ -2,7 +2,8 @@
|
||||
<img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
|
||||
</div></h1>
|
||||
|
||||
[](https://pypi.org/project/pipecat-ai)  [](https://codecov.io/gh/pipecat-ai/pipecat) [](https://docs.pipecat.ai) [](https://discord.gg/pipecat)
|
||||
[](https://pypi.org/project/pipecat-ai)  [](https://codecov.io/gh/pipecat-ai/pipecat) [](https://docs.pipecat.ai) [](https://discord.gg/pipecat) [](https://deepwiki.com/pipecat-ai/pipecat)
|
||||
[](https://getmanta.ai/pipecat)
|
||||
|
||||
# 🎙️ Pipecat: Real-Time Voice & Multimodal AI Agents
|
||||
|
||||
@@ -19,8 +20,6 @@
|
||||
- **Business Agents** – customer intake, support bots, guided flows
|
||||
- **Complex Dialog Systems** – design logic with structured conversations
|
||||
|
||||
🧭 Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
|
||||
|
||||
## 🧠 Why Pipecat?
|
||||
|
||||
- **Voice-first**: Integrates speech recognition, text-to-speech, and conversation handling
|
||||
@@ -28,6 +27,35 @@
|
||||
- **Composable Pipelines**: Build complex behavior from modular components
|
||||
- **Real-Time**: Ultra-low latency interaction with different transports (e.g. WebSockets or WebRTC)
|
||||
|
||||
## 🌐 Pipecat Ecosystem
|
||||
|
||||
### 📱 Client SDKs
|
||||
|
||||
Building client applications? You can connect to Pipecat from any platform using our official SDKs:
|
||||
|
||||
<a href="https://docs.pipecat.ai/client/js/introduction">JavaScript</a> | <a href="https://docs.pipecat.ai/client/react/introduction">React</a> | <a href="https://docs.pipecat.ai/client/react-native/introduction">React Native</a> |
|
||||
<a href="https://docs.pipecat.ai/client/ios/introduction">Swift</a> | <a href="https://docs.pipecat.ai/client/android/introduction">Kotlin</a> | <a href="https://docs.pipecat.ai/client/c++/introduction">C++</a> | <a href="https://github.com/pipecat-ai/pipecat-esp32">ESP32</a>
|
||||
|
||||
### 🧭 Structured conversations
|
||||
|
||||
Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
|
||||
|
||||
### 🪄 Beautiful UIs
|
||||
|
||||
Want to build beautiful and engaging experiences? Checkout the [Voice UI Kit](https://github.com/pipecat-ai/voice-ui-kit), a collection of components, hooks and templates for building voice AI applications quickly.
|
||||
|
||||
### 🔍 Debugging
|
||||
|
||||
Looking for help debugging your pipeline and processors? Check out [Whisker](https://github.com/pipecat-ai/whisker), a real-time Pipecat debugger.
|
||||
|
||||
### 🖥️ Terminal
|
||||
|
||||
Love terminal applications? Check out [Tail](https://github.com/pipecat-ai/tail), a terminal dashboard for Pipecat.
|
||||
|
||||
### 📺️ Pipecat TV Channel
|
||||
|
||||
Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.youtube.com/playlist?list=PLzU2zoMTQIHjqC3v4q2XVSR3hGSzwKFwH) channel.
|
||||
|
||||
## 🎬 See it in action
|
||||
|
||||
<p float="left">
|
||||
@@ -38,31 +66,20 @@
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/moondream-chatbot/image.png" width="400" /></a>
|
||||
</p>
|
||||
|
||||
## 📱 Client SDKs
|
||||
|
||||
You can connect to Pipecat from any platform using our official SDKs:
|
||||
|
||||
| Platform | SDK Repo | Description |
|
||||
| -------- | ------------------------------------------------------------------------------ | -------------------------------- |
|
||||
| Web | [pipecat-client-web](https://github.com/pipecat-ai/pipecat-client-web) | JavaScript and React client SDKs |
|
||||
| iOS | [pipecat-client-ios](https://github.com/pipecat-ai/pipecat-client-ios) | Swift SDK for iOS |
|
||||
| Android | [pipecat-client-android](https://github.com/pipecat-ai/pipecat-client-android) | Kotlin SDK for Android |
|
||||
| C++ | [pipecat-client-cxx](https://github.com/pipecat-ai/pipecat-client-cxx) | C++ client SDK |
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
@@ -115,7 +132,7 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
### Prerequisites
|
||||
|
||||
**Minimum Python Version:** 3.10
|
||||
**Recommended Python Version:** 3.11-3.12
|
||||
**Recommended Python Version:** 3.12
|
||||
|
||||
### Setup Steps
|
||||
|
||||
@@ -129,7 +146,11 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
2. Install development and testing dependencies:
|
||||
|
||||
```bash
|
||||
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp --no-extra local
|
||||
uv sync --group dev --all-extras \
|
||||
--no-extra gstreamer \
|
||||
--no-extra krisp \
|
||||
--no-extra local \
|
||||
--no-extra ultravox # (ultravox not fully supported on macOS)
|
||||
```
|
||||
|
||||
3. Install the git pre-commit hooks:
|
||||
@@ -138,23 +159,6 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
uv run pre-commit install
|
||||
```
|
||||
|
||||
### Python 3.13+ Compatibility
|
||||
|
||||
Some features require PyTorch, which doesn't yet support Python 3.13+. Install using:
|
||||
|
||||
```bash
|
||||
uv sync --group dev --all-extras \
|
||||
--no-extra gstreamer \
|
||||
--no-extra krisp \
|
||||
--no-extra local \
|
||||
--no-extra local-smart-turn \
|
||||
--no-extra mlx-whisper \
|
||||
--no-extra moondream \
|
||||
--no-extra ultravox
|
||||
```
|
||||
|
||||
> **Tip:** For full compatibility, use Python 3.12: `uv python pin 3.12`
|
||||
|
||||
> **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors.
|
||||
|
||||
### Running tests
|
||||
@@ -171,54 +175,6 @@ Run a specific test suite:
|
||||
uv run pytest tests/test_name.py
|
||||
```
|
||||
|
||||
### Setting up your editor
|
||||
|
||||
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting via [Ruff](https://github.com/astral-sh/ruff).
|
||||
|
||||
#### Emacs
|
||||
|
||||
You can use [use-package](https://github.com/jwiegley/use-package) to install [emacs-lazy-ruff](https://github.com/christophermadsen/emacs-lazy-ruff) package and configure `ruff` arguments:
|
||||
|
||||
```elisp
|
||||
(use-package lazy-ruff
|
||||
:ensure t
|
||||
:hook ((python-mode . lazy-ruff-mode))
|
||||
:config
|
||||
(setq lazy-ruff-format-command "ruff format")
|
||||
(setq lazy-ruff-check-command "ruff check --select I"))
|
||||
```
|
||||
|
||||
`ruff` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
|
||||
|
||||
```elisp
|
||||
(use-package pyvenv-auto
|
||||
:ensure t
|
||||
:defer t
|
||||
:hook ((python-mode . pyvenv-auto-run)))
|
||||
```
|
||||
|
||||
#### Visual Studio Code
|
||||
|
||||
Install the
|
||||
[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, and enable formatting on save:
|
||||
|
||||
```json
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "charliermarsh.ruff",
|
||||
"editor.formatOnSave": true
|
||||
}
|
||||
```
|
||||
|
||||
#### PyCharm
|
||||
|
||||
`ruff` was installed in the `venv` environment described before, now to enable autoformatting on save, go to `File` -> `Settings` -> `Tools` -> `File Watchers` and add a new watcher with the following settings:
|
||||
|
||||
1. **Name**: `Ruff formatter`
|
||||
2. **File type**: `Python`
|
||||
3. **Working directory**: `$ContentRoot$`
|
||||
4. **Arguments**: `format $FilePath$`
|
||||
5. **Program**: `$PyInterpreterDirectory$/ruff`
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
We welcome contributions from the community! Whether you're fixing bugs, improving documentation, or adding new features, here's how you can help:
|
||||
|
||||
5
SECURITY.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Security Policy
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
Please email `disclosures@daily.co`.
|
||||
@@ -1,10 +0,0 @@
|
||||
# Pipecat Docs
|
||||
|
||||
## [Architecture Overview](architecture.md)
|
||||
|
||||
Learn about the thinking behind the framework's design.
|
||||
|
||||
## [A Frame's Progress](frame-progress.md)
|
||||
|
||||
See how a Frame is processed through a Transport, a Pipeline, and a series of Frame Processors.
|
||||
|
||||
@@ -50,6 +50,7 @@ autodoc_mock_imports = [
|
||||
# Krisp - has build issues on some platforms
|
||||
"pipecat_ai_krisp",
|
||||
"krisp",
|
||||
"krisp_audio",
|
||||
# System-specific GUI libraries
|
||||
"_tkinter",
|
||||
"tkinter",
|
||||
|
||||
@@ -21,6 +21,7 @@ Quick Links
|
||||
Adapters <api/pipecat.adapters>
|
||||
Audio <api/pipecat.audio>
|
||||
Clocks <api/pipecat.clocks>
|
||||
Extensions <api/pipecat.extensions>
|
||||
Frames <api/pipecat.frames>
|
||||
Metrics <api/pipecat.metrics>
|
||||
Observers <api/pipecat.observers>
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
# Pipecat architecture guide
|
||||
|
||||
## Frames
|
||||
|
||||
Frames can represent discrete chunks of data, for instance a chunk of text, a chunk of audio, or an image. They can also be used to as control flow, for instance a frame that indicates that there is no more data available, or that a user started or stopped talking. They can also represent more complex data structures, such as a message array used for an LLM completion.
|
||||
|
||||
## FrameProcessors
|
||||
|
||||
Frame processors operate on frames. Every frame processor implements a `process_frame` method that consumes one frame and produces zero or more frames. Frame processors can do simple transforms, such as concatenating text fragments into sentences, or they can treat frames as input for an AI Service, and emit chat completions based on message arrays or transform text into audio or images.
|
||||
|
||||
## Pipelines
|
||||
|
||||
Pipelines are lists of frame processors linked together. Frame processors can push frames upstream or downstream to their peers. A very simple pipeline might chain an LLM frame processor to a text-to-speech frame processor, with a transport as an output.
|
||||
|
||||
## Transports
|
||||
|
||||
Transports provide input and output frame processors to receive or send frames respectively. For example, the `DailyTransport` does this with a WebRTC session joined to a Daily.co room.
|
||||
@@ -1,46 +0,0 @@
|
||||
# A Frame's Progress
|
||||
|
||||
1. A user says “Hello, LLM” and the cloud transcription service delivers a transcription to the Transport.
|
||||

|
||||
|
||||
2. The Transport places a Transcription frame in the Pipeline’s source queue.
|
||||

|
||||
|
||||
3. The Pipeline passes the Transcription frame to the first Frame Processor in its list, the LLM User Message Aggregator.
|
||||

|
||||
|
||||
4. The LLM User Message Aggregator updates the LLM Context with a `{“user”: “Hello LLM”}` message.
|
||||

|
||||
|
||||
5. The LLM User Message Aggregator yields an LLM Message Frame, containing the updated LLM Context. The Pipeline passes this frame to the LLM Frame Processor.
|
||||

|
||||
|
||||
6. The LLM Frame Processor creates a streaming chat completion based on the LLM context and yields the first chunk of a response, Text Frame with the value “Hi, “. The Pipeline passes this frame to the TTS Frame Processor. The TTS Frame Processor aggregates this response but doesn’t yield anything, yet, because it’s waiting for a full sentence.
|
||||

|
||||
|
||||
7. The LLM Frame Processor yields another Text Frame with the value “there.”. The Pipeline passes this frame to the TTS Frame Processor.
|
||||

|
||||
|
||||
8. The TTS Frame Processor now has a full sentence, so it starts streaming audio based on “Hi, there.” It yields the first chunk of streaming audio as an Audio frame, which the Pipeline passes to the LLM Assistant Message Aggregator.
|
||||

|
||||
|
||||
9. The LLM Assistant Message Aggregator doesn’t do anything with Audio frames, so it immediately yields the frame, unchanged. This is the convention for all Frame Processors: frames that the processor doesn’t process should be immediately yielded.
|
||||

|
||||
|
||||
10. The Pipeline places the first Audio frame in its sink queue, which is being watched by the Transport. Since the frame is now in a queue, the Pipeline can continue processing other frames. Note that the source and sink queues form a sort of “boundary of concurrent processing” between a Pipeline and the outside world. In a Pipeline, Frames are processed sequentially; once a Frame is on a queue it can be processed in parallel with the frames being processed by the Pipeline. TODO: link to a more in-depth section about this.
|
||||

|
||||
|
||||
11. The TTS Frame Processor yields another Audio frame as the Transport transmits the first Audio frame.
|
||||

|
||||
|
||||
12. As before, the LLM Assistant Message Aggregator immediately yields the Audio frame and the Pipeline places the Audio frame in the sink queue.
|
||||

|
||||
|
||||
13. The TTS Frame Processor has no more frames to yield. The LLM Frame Processor emits an LLM Response End Frame, which the Pipeline passes to the TTS Frame Processor.
|
||||

|
||||
|
||||
14. The TTS Frame Processor immediately yields the LLM Response End Frame, so the Pipeline passes it along to the LLM Assistant Message Aggregator. The LLM Assistant Message Aggregator updates the LLM Context with the full response from the LLM. TODO TODO: I realized I forgot that the TSS Frame Processor also yields the Text frames that the LLM emitted so that the LLM Assistant Message Aggregator could accumulate them, arrggh.
|
||||

|
||||
|
||||
15. The system is quiet, and waiting for the next message from the Transport.
|
||||

|
||||
110
docs/frame.md
@@ -1,110 +0,0 @@
|
||||
# Understanding Different Frame Types in the Pipecat System
|
||||
|
||||
In the Pipecat system, frames are used to represent different types of data and control signals that flow through the pipeline. Understanding these frame types is crucial for working with the system effectively. This tutorial will cover the main categories of frames and their specific uses.
|
||||
|
||||
## 1. Base Frame Classes
|
||||
|
||||
### Frame
|
||||
The `Frame` class is the base class for all frames. It includes:
|
||||
- `id`: A unique identifier
|
||||
- `name`: A descriptive name
|
||||
- `pts`: Presentation timestamp (optional)
|
||||
|
||||
### DataFrame
|
||||
`DataFrame` is a subclass of `Frame` and serves as a base for most data-carrying frames.
|
||||
|
||||
## 2. Audio Frames
|
||||
|
||||
### AudioRawFrame
|
||||
Represents a chunk of audio with properties:
|
||||
- `audio`: Raw audio data
|
||||
- `sample_rate`: Audio sample rate
|
||||
- `num_channels`: Number of audio channels
|
||||
|
||||
Subclasses include:
|
||||
- `InputAudioRawFrame`: For audio from input sources
|
||||
- `OutputAudioRawFrame`: For audio to be played by output devices
|
||||
- `TTSAudioRawFrame`: For audio generated by Text-to-Speech services
|
||||
|
||||
## 3. Image Frames
|
||||
|
||||
### ImageRawFrame
|
||||
Represents an image with properties:
|
||||
- `image`: Raw image data
|
||||
- `size`: Image dimensions
|
||||
- `format`: Image format (e.g., JPEG, PNG)
|
||||
|
||||
Subclasses include:
|
||||
- `InputImageRawFrame`: For images from input sources
|
||||
- `OutputImageRawFrame`: For images to be displayed
|
||||
- `UserImageRawFrame`: For images associated with a specific user
|
||||
- `VisionImageRawFrame`: For images with associated text for description
|
||||
- `URLImageRawFrame`: For images with an associated URL
|
||||
|
||||
### SpriteFrame
|
||||
Represents an animated sprite, containing a list of `ImageRawFrame` objects.
|
||||
|
||||
## 4. Text and Transcription Frames
|
||||
|
||||
### TextFrame
|
||||
Represents a chunk of text, used for various purposes in the pipeline.
|
||||
|
||||
### TranscriptionFrame
|
||||
A specialized `TextFrame` for speech transcriptions, including:
|
||||
- `user_id`: ID of the speaking user
|
||||
- `timestamp`: When the transcription was generated
|
||||
- `language`: Detected language of the speech
|
||||
|
||||
### InterimTranscriptionFrame
|
||||
Similar to `TranscriptionFrame`, but for interim (not final) transcriptions.
|
||||
|
||||
## 5. LLM (Language Model) Frames
|
||||
|
||||
### LLMMessagesFrame
|
||||
Contains a list of messages for an LLM service to process.
|
||||
|
||||
### LLMMessagesAppendFrame and LLMMessagesUpdateFrame
|
||||
Used to modify the current context of LLM messages.
|
||||
|
||||
### LLMSetToolsFrame
|
||||
Specifies tools (functions) available for the LLM to use.
|
||||
|
||||
### LLMEnablePromptCachingFrame
|
||||
Controls prompt caching in certain LLMs.
|
||||
|
||||
## 6. System and Control Frames
|
||||
|
||||
### SystemFrame
|
||||
Base class for system-level frames.
|
||||
|
||||
Important system frames include:
|
||||
- `StartFrame`: Initiates a pipeline
|
||||
- `CancelFrame`: Stops a pipeline immediately
|
||||
- `ErrorFrame`: Notifies of errors (with `FatalErrorFrame` for unrecoverable errors)
|
||||
- `EndTaskFrame` and `CancelTaskFrame`: Control pipeline tasks
|
||||
- `StartInterruptionFrame` and `StopInterruptionFrame`: Indicate user speech for interruptions
|
||||
|
||||
### ControlFrame
|
||||
Base class for control-flow frames.
|
||||
|
||||
Notable control frames:
|
||||
- `EndFrame`: Signals the end of a pipeline
|
||||
- `LLMFullResponseStartFrame` and `LLMFullResponseEndFrame`: Bracket LLM responses
|
||||
- `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame`: Indicate user speech activity
|
||||
- `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame`: Indicate bot speech activity
|
||||
- `TTSStartedFrame` and `TTSStoppedFrame`: Bracket Text-to-Speech responses
|
||||
|
||||
## 7. Special Purpose Frames
|
||||
|
||||
### MetricsFrame
|
||||
Contains performance metrics data.
|
||||
|
||||
### FunctionCallInProgressFrame and FunctionCallResultFrame
|
||||
Used for handling LLM function (tool) calls.
|
||||
|
||||
### ServiceUpdateSettingsFrame
|
||||
Base class for updating service settings, with specific subclasses for LLM, TTS, and STT services.
|
||||
|
||||
## Conclusion
|
||||
|
||||
Understanding these frame types is essential for working with the Pipecat system. Each frame type serves a specific purpose in the pipeline, whether it's carrying data (like audio or images), controlling the flow of the pipeline, or managing system-level operations. By using the appropriate frame types, you can effectively process and transmit various kinds of information through your pipeline.
|
||||
|
Before Width: | Height: | Size: 98 KiB |
|
Before Width: | Height: | Size: 91 KiB |
|
Before Width: | Height: | Size: 92 KiB |
|
Before Width: | Height: | Size: 92 KiB |
|
Before Width: | Height: | Size: 98 KiB |
|
Before Width: | Height: | Size: 94 KiB |
|
Before Width: | Height: | Size: 94 KiB |
|
Before Width: | Height: | Size: 95 KiB |
|
Before Width: | Height: | Size: 94 KiB |
|
Before Width: | Height: | Size: 96 KiB |
|
Before Width: | Height: | Size: 110 KiB |
|
Before Width: | Height: | Size: 102 KiB |
|
Before Width: | Height: | Size: 111 KiB |
|
Before Width: | Height: | Size: 117 KiB |
|
Before Width: | Height: | Size: 98 KiB |
31
env.example
@@ -1,3 +1,6 @@
|
||||
# AI-COUSTICS
|
||||
AICOUSTICS_LICENSE_KEY=...
|
||||
|
||||
# Anthropic
|
||||
ANTHROPIC_API_KEY=...
|
||||
|
||||
@@ -55,13 +58,19 @@ GOOGLE_CLOUD_PROJECT_ID=...
|
||||
GOOGLE_TEST_CREDENTIALS=...
|
||||
GOOGLE_VERTEX_TEST_CREDENTIALS=...
|
||||
|
||||
# Hume
|
||||
HUME_API_KEY=...
|
||||
|
||||
# LMNT
|
||||
LMNT_API_KEY=...
|
||||
LMNT_VOICE_ID=...
|
||||
|
||||
# Perplexity
|
||||
PERPLEXITY_API_KEY=...
|
||||
|
||||
# PlayHT
|
||||
PLAY_HT_USER_ID=...
|
||||
PLAY_HT_API_KEY=...
|
||||
PLAYHT_USER_ID=...
|
||||
PLAYHT_API_KEY=...
|
||||
|
||||
# OpenAI
|
||||
OPENAI_API_KEY=...
|
||||
@@ -81,6 +90,9 @@ SIMLI_FACE_ID=...
|
||||
# Krisp
|
||||
KRISP_MODEL_PATH=...
|
||||
|
||||
# Krisp Viva
|
||||
KRISP_VIVA_MODEL_PATH=...
|
||||
|
||||
# DeepSeek
|
||||
DEEPSEEK_API_KEY=...
|
||||
|
||||
@@ -140,3 +152,18 @@ SENTRY_DSN=...
|
||||
|
||||
# Heygen
|
||||
HEYGEN_API_KEY=...
|
||||
|
||||
# Mistral
|
||||
MISTRAL_API_KEY=...
|
||||
|
||||
# NVIDIA
|
||||
NVIDIA_API_KEY=...
|
||||
|
||||
# Qwen
|
||||
QWEN_API_KEY=...
|
||||
|
||||
# WhatsApp
|
||||
WHATSAPP_TOKEN=
|
||||
WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=
|
||||
WHATSAPP_PHONE_NUMBER_ID=
|
||||
WHATSAPP_APP_SECRET=
|
||||
@@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.piper.tts import PiperTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.rime.tts import RimeHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -17,8 +17,8 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -11,13 +11,13 @@ import sys
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.runner.livekit import configure
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
|
||||
from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -50,7 +50,7 @@ async def main():
|
||||
async def on_first_participant_joined(transport, participant_id):
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frame(
|
||||
TextFrame(
|
||||
TTSSpeakFrame(
|
||||
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
||||
)
|
||||
)
|
||||
|
||||
@@ -17,8 +17,8 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.riva.tts import FastPitchTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -9,21 +9,18 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.frames.frames import EndFrame, LLMContextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -63,7 +60,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([OpenAILLMContextFrame(OpenAILLMContext(messages)), EndFrame()])
|
||||
await task.queue_frames([LLMContextFrame(LLMContext(messages)), EndFrame()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.fal.image import FalImageGenService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.google.image import GoogleImageGenService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -17,17 +17,22 @@ from fastapi.responses import RedirectResponse
|
||||
from loguru import logger
|
||||
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import IceServer, SmallWebRTCConnection
|
||||
from pipecat.transports.smallwebrtc.connection import IceServer, SmallWebRTCConnection
|
||||
from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -55,7 +60,8 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
params=TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -75,8 +81,8 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -103,7 +109,7 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -12,15 +12,20 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.daily import configure
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyLogLevel, DailyParams, DailyTransport
|
||||
from pipecat.transports.daily.transport import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -40,10 +45,10 @@ async def main():
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
)
|
||||
transport.set_log_level(DailyLogLevel.Info)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
@@ -59,8 +64,8 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -86,7 +91,7 @@ async def main():
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
|
||||
@@ -12,23 +12,27 @@ import sys
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
BotInterruptionFrame,
|
||||
TextFrame,
|
||||
InterruptionFrame,
|
||||
TranscriptionFrame,
|
||||
TTSSpeakFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.livekit import configure
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
|
||||
from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -46,7 +50,8 @@ async def main():
|
||||
params=LiveKitParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -69,8 +74,8 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -98,7 +103,7 @@ async def main():
|
||||
async def on_first_participant_joined(transport, participant_id):
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frame(
|
||||
TextFrame(
|
||||
TTSSpeakFrame(
|
||||
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
||||
)
|
||||
)
|
||||
@@ -115,7 +120,7 @@ async def main():
|
||||
|
||||
await task.queue_frames(
|
||||
[
|
||||
BotInterruptionFrame(),
|
||||
InterruptionFrame(),
|
||||
UserStartedSpeakingFrame(),
|
||||
TranscriptionFrame(
|
||||
user_id=participant_id,
|
||||
|
||||
@@ -14,6 +14,7 @@ from loguru import logger
|
||||
from pipecat.frames.frames import (
|
||||
DataFrame,
|
||||
Frame,
|
||||
LLMContextFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
TextFrame,
|
||||
)
|
||||
@@ -21,10 +22,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
@@ -33,7 +32,7 @@ from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||
from pipecat.services.fal.image import FalImageGenService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -156,7 +155,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
}
|
||||
]
|
||||
frames.append(MonthFrame(month=month))
|
||||
frames.append(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
frames.append(LLMContextFrame(LLMContext(messages)))
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
|
||||
@@ -15,6 +15,7 @@ from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMContextFrame,
|
||||
OutputAudioRawFrame,
|
||||
TextFrame,
|
||||
TTSAudioRawFrame,
|
||||
@@ -24,10 +25,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||
@@ -140,7 +139,7 @@ async def main():
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frame(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
await task.queue_frame(LLMContextFrame(LLMContext(messages)))
|
||||
await task.stop_when_done()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
@@ -9,8 +9,11 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, MetricsFrame
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import Frame, LLMRunFrame, MetricsFrame
|
||||
from pipecat.metrics.metrics import (
|
||||
LLMUsageMetricsData,
|
||||
ProcessingMetricsData,
|
||||
@@ -20,7 +23,8 @@ from pipecat.metrics.metrics import (
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
@@ -28,8 +32,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -61,17 +65,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -97,8 +104,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -127,7 +134,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,17 +10,22 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
LLMRunFrame,
|
||||
OutputImageRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
@@ -28,7 +33,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -65,7 +70,7 @@ class ImageSyncAggregator(FrameProcessor):
|
||||
)
|
||||
)
|
||||
|
||||
await self.push_frame(frame)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
@@ -78,7 +83,8 @@ transport_params = {
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
@@ -86,7 +92,8 @@ transport_params = {
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -110,8 +117,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
image_sync_aggregator = ImageSyncAggregator(
|
||||
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
|
||||
@@ -144,7 +151,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -9,19 +9,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -33,17 +38,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -67,8 +75,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -96,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -9,19 +9,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -32,17 +37,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -66,8 +74,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -95,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -9,13 +9,15 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
@@ -24,8 +26,8 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -122,8 +124,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
|
||||
)
|
||||
@@ -154,7 +156,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Say a short hello to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -9,14 +9,19 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
@@ -25,8 +30,8 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -37,17 +42,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -105,8 +113,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
|
||||
)
|
||||
@@ -137,7 +145,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Say a short hello to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.soniox.stt import SonioxSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -30,17 +35,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -66,8 +74,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -94,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,19 +11,24 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.inworld.tts import InworldTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -66,9 +74,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
voice_id="Ashley",
|
||||
model="inworld-tts-1",
|
||||
streaming=streaming, # True: real-time chunks, False: complete audio then playback
|
||||
params=InworldTTSService.InputParams(
|
||||
temperature=0.8,
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
@@ -80,8 +85,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -109,7 +114,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,19 +11,24 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.asyncai.tts import AsyncAIHttpTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -35,17 +40,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -72,8 +80,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -101,7 +109,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.asyncai.tts import AsyncAITTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -68,8 +76,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -97,7 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
169
examples/foundational/07ad-interruptible-aicoustics.py
Normal file
@@ -0,0 +1,169 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import wave
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.filters.aic_filter import AICFilter
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# Create audio buffer processor so we can hear the audio fitler results.
|
||||
audiobuffer = AudioBufferProcessor(
|
||||
num_channels=2, # 1 for mono, 2 for stereo (user left, bot right)
|
||||
enable_turn_audio=False, # Enable per-turn audio recording
|
||||
)
|
||||
|
||||
|
||||
def _create_aic_filter() -> AICFilter:
|
||||
license_key = os.getenv("AICOUSTICS_LICENSE_KEY", "")
|
||||
|
||||
return AICFilter(
|
||||
license_key=license_key,
|
||||
enhancement_level=1.0,
|
||||
)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
audio_in_filter=_create_aic_filter(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
audio_in_filter=_create_aic_filter(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
audio_in_filter=_create_aic_filter(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
audiobuffer, # write audio data to a file
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
await audiobuffer.start_recording()
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@audiobuffer.event_handler("on_audio_data")
|
||||
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
# Save or process the composite audio
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"./conversation_{timestamp}.wav"
|
||||
|
||||
# Create the WAV file
|
||||
with wave.open(filename, "wb") as wf:
|
||||
wf.setnchannels(num_channels)
|
||||
wf.setsampwidth(2) # 16-bit audio
|
||||
wf.setframerate(sample_rate)
|
||||
wf.writeframes(audio)
|
||||
|
||||
logger.info(f"Saved recording to {filename}")
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
138
examples/foundational/07ae-interruptible-hume.py
Normal file
@@ -0,0 +1,138 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.hume.tts import HUME_SAMPLE_RATE, HumeTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = HumeTTSService(
|
||||
api_key=os.getenv("HUME_API_KEY"),
|
||||
# Replace with your Hume voice ID
|
||||
voice_id="f898a92e-685f-43fa-985b-a46920f0650b",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
rtvi,
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
audio_out_sample_rate=HUME_SAMPLE_RATE,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
await rtvi.set_bot_ready()
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -15,26 +15,24 @@ from langchain_core.runnables.history import RunnableWithMessageHistory
|
||||
from langchain_openai import ChatOpenAI
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMMessagesUpdateFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
)
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -55,17 +53,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -100,19 +101,18 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
)
|
||||
lc = LangchainProcessor(history_chain)
|
||||
|
||||
context = OpenAILLMContext()
|
||||
tma_in = LLMUserContextAggregator(context=context)
|
||||
tma_out = LLMAssistantContextAggregator(context=context)
|
||||
context = LLMContext()
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
lc, # Langchain
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
@@ -129,7 +129,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
# An `OpenAILLMContextFrame` will be picked up by the LangchainProcessor using
|
||||
# An `LLMContextFrame` will be picked up by the LangchainProcessor using
|
||||
# only the content of the last message to inject it in the prompt defined
|
||||
# above. So no role is required here.
|
||||
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
||||
|
||||
118
examples/foundational/07c-interruptible-deepgram-flux.py
Normal file
@@ -0,0 +1,118 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContext,
|
||||
LLMContextAggregatorPair,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramFluxSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-2-andromeda-en")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -12,23 +12,24 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
BotInterruptionFrame,
|
||||
StopInterruptionFrame,
|
||||
InterruptionFrame,
|
||||
LLMRunFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -71,8 +72,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -97,18 +98,18 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
@stt.event_handler("on_speech_started")
|
||||
async def on_speech_started(stt, *args, **kwargs):
|
||||
await task.queue_frames([BotInterruptionFrame(), UserStartedSpeakingFrame()])
|
||||
await task.queue_frames([InterruptionFrame(), UserStartedSpeakingFrame()])
|
||||
|
||||
@stt.event_handler("on_utterance_end")
|
||||
async def on_utterance_end(stt, *args, **kwargs):
|
||||
await task.queue_frames([StopInterruptionFrame(), UserStoppedSpeakingFrame()])
|
||||
await task.queue_frames([UserStoppedSpeakingFrame()])
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -65,8 +73,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -94,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,19 +11,24 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.elevenlabs.stt import ElevenLabsSTTService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -35,17 +40,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -55,7 +63,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
stt = ElevenLabsSTTService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
tts = ElevenLabsHttpTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
@@ -72,8 +83,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -101,7 +112,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -68,8 +76,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -97,7 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.playht.tts import PlayHTHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -33,17 +38,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -68,8 +76,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -97,7 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,11 +10,16 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
@@ -22,8 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.playht.tts import PlayHTTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -70,8 +78,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -99,7 +107,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.azure.llm import AzureLLMService
|
||||
from pipecat.services.azure.stt import AzureSTTService
|
||||
from pipecat.services.azure.tts import AzureTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -33,17 +38,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -74,8 +82,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -103,7 +111,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.stt import OpenAISTTService
|
||||
from pipecat.services.openai.tts import OpenAITTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -33,17 +38,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -68,8 +76,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -98,7 +106,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,19 +11,24 @@ import time
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openpipe.llm import OpenPipeLLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -73,8 +81,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -102,7 +110,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,19 +11,24 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.xtts.tts import XTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -71,8 +79,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -100,7 +108,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,11 +10,16 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
@@ -23,8 +28,8 @@ from pipecat.services.gladia.stt import GladiaSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -35,17 +40,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -77,8 +85,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -106,7 +114,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.lmnt.tts import LmntTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -33,17 +38,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -64,8 +72,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -93,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,20 +10,25 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response import LLMUserAggregatorParams
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.groq.llm import GroqLLMService
|
||||
from pipecat.services.groq.stt import GroqSTTService
|
||||
from pipecat.services.groq.tts import GroqTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -67,8 +75,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(
|
||||
context, user_params=LLMUserAggregatorParams(aggregation_timeout=0.05)
|
||||
)
|
||||
|
||||
@@ -98,7 +106,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
177
examples/foundational/07m-interruptible-aws-strands.py
Normal file
@@ -0,0 +1,177 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMMessagesAppendFrame, LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.frameworks.strands_agents import StrandsAgentsProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.aws.stt import AWSTranscribeSTTService
|
||||
from pipecat.services.aws.tts import AWSPollyTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
# Strands agent setup
|
||||
try:
|
||||
from strands import Agent, tool
|
||||
from strands.models import BedrockModel
|
||||
except ImportError:
|
||||
logger.warning("Strands not installed. Please install with: pip install strands-agents")
|
||||
Agent = None
|
||||
BedrockModel = None
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def build_agent(model_id: str, max_tokens: int):
|
||||
"""Create and configure a Strands agent for NAB customer service coaching.
|
||||
|
||||
Args:
|
||||
model_id: The AWS Bedrock model ID to use
|
||||
max_tokens: Maximum tokens for the model
|
||||
|
||||
Returns:
|
||||
Configured Strands Agent
|
||||
"""
|
||||
|
||||
@tool
|
||||
def check_weather(location: str) -> str:
|
||||
if location.lower() == "san francisco":
|
||||
return "The weather in San Francisco is sunny and 30 degrees."
|
||||
elif location.lower() == "sydney":
|
||||
return "The weather in Sydney is cloudy and 20 degrees."
|
||||
else:
|
||||
return "I'm not sure about the weather in that location."
|
||||
|
||||
agent = Agent(
|
||||
model=BedrockModel(
|
||||
model_id=model_id,
|
||||
max_tokens=max_tokens,
|
||||
),
|
||||
tools=[check_weather],
|
||||
system_prompt="You are a helpful assistant that can check the weather in a given location.",
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = AWSTranscribeSTTService()
|
||||
|
||||
tts = AWSPollyTTSService(
|
||||
region="us-west-2", # only specific regions support generative TTS
|
||||
voice_id="Joanna",
|
||||
params=AWSPollyTTSService.InputParams(engine="generative", rate="1.1"),
|
||||
)
|
||||
|
||||
# Create Strands agent processor
|
||||
try:
|
||||
agent = build_agent(model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0", max_tokens=8000)
|
||||
llm = StrandsAgentsProcessor(agent=agent)
|
||||
logger.info("Successfully created Strands agent for NAB customer service coaching")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create Strands agent: {e}")
|
||||
raise ValueError(
|
||||
"Unable to create Strands processor. Please ensure you have properly "
|
||||
"installed strands-agents and configured your AWS credentials."
|
||||
)
|
||||
|
||||
# Setup context aggregators for message handling
|
||||
context = LLMContext()
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # Speech-to-text
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # Strands Agents processor
|
||||
tts, # Text-to-speech
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames(
|
||||
[
|
||||
LLMMessagesAppendFrame(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Greet the user and introduce yourself.",
|
||||
}
|
||||
],
|
||||
run_llm=True,
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -8,19 +8,24 @@
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.aws.llm import AWSBedrockLLMService
|
||||
from pipecat.services.aws.stt import AWSTranscribeSTTService
|
||||
from pipecat.services.aws.tts import AWSPollyTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -31,17 +36,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -70,8 +78,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -99,7 +107,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
151
examples/foundational/07n-interruptible-gemini-image.py
Normal file
@@ -0,0 +1,151 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""
|
||||
A conversational AI bot using Gemini for both LLM, STT and TTS.
|
||||
|
||||
This example demonstrates how to use Gemini's image generation capabilities.
|
||||
|
||||
Features showcased:
|
||||
- Gemini LLM for conversation and image generation
|
||||
- Google TTS and STT
|
||||
|
||||
Run with:
|
||||
python examples/foundational/07n-interruptible-gemini-image.py
|
||||
|
||||
Make sure to set your environment variables:
|
||||
export GOOGLE_API_KEY=your_api_key_here
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.google.stt import GoogleSTTService
|
||||
from pipecat.services.google.tts import GoogleTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = GoogleSTTService(
|
||||
params=GoogleSTTService.InputParams(languages=Language.EN_US),
|
||||
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
|
||||
)
|
||||
|
||||
tts = GoogleTTSService(
|
||||
voice_id="en-US-Chirp3-HD-Charon",
|
||||
params=GoogleTTSService.InputParams(language=Language.EN_US),
|
||||
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash-image",
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # Gemini TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation with a styled introduction
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -28,11 +28,16 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
@@ -40,8 +45,8 @@ from pipecat.services.google.stt import GoogleSTTService
|
||||
from pipecat.services.google.tts import GeminiTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -52,17 +57,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -105,8 +113,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -139,7 +147,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
"content": "Say cheerfully and warmly: Hello! I'm your AI assistant powered by Gemini's new TTS technology. I can speak with different voices, tones, and styles. How can I help you today?",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,11 +10,16 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
@@ -22,8 +27,8 @@ from pipecat.services.google.stt import GoogleSTTService
|
||||
from pipecat.services.google.tts import GoogleTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -77,8 +85,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -106,7 +114,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.assemblyai.stt import AssemblyAISTTService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -70,8 +78,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -99,7 +107,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
129
examples/foundational/07p-interruptible-krisp-viva.py
Normal file
@@ -0,0 +1,129 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -11,19 +11,24 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.filters.krisp_filter import KrispFilter
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,19 +39,22 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
audio_in_filter=KrispFilter(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
audio_in_filter=KrispFilter(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
audio_in_filter=KrispFilter(),
|
||||
),
|
||||
}
|
||||
@@ -68,8 +76,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -97,7 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,19 +11,24 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.rime.tts import RimeHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -35,17 +40,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -73,8 +81,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -102,7 +110,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.rime.tts import RimeTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -33,17 +38,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -67,8 +75,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -96,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.nim.llm import NimLLMService
|
||||
from pipecat.services.riva.stt import RivaSTTService
|
||||
from pipecat.services.riva.tts import RivaTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -33,17 +38,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -64,8 +72,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -93,7 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -12,13 +12,17 @@ from dotenv import load_dotenv
|
||||
from google.genai.types import Content, Part
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
InterruptionFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
StartInterruptionFrame,
|
||||
LLMRunFrame,
|
||||
TextFrame,
|
||||
TranscriptionFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
@@ -27,7 +31,8 @@ from pipecat.frames.frames import (
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.frame_processor import FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
@@ -35,8 +40,8 @@ from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.google.tts import GoogleTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -92,9 +97,8 @@ class UserAudioCollector(FrameProcessor):
|
||||
elif isinstance(frame, UserStoppedSpeakingFrame):
|
||||
self._user_speaking = False
|
||||
self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||
await self._user_context_aggregator.push_frame(
|
||||
self._user_context_aggregator.get_context_frame()
|
||||
)
|
||||
await self._user_context_aggregator.push_frame(LLMRunFrame())
|
||||
|
||||
elif isinstance(frame, InputAudioRawFrame):
|
||||
if self._user_speaking:
|
||||
self._audio_frames.append(frame)
|
||||
@@ -150,7 +154,7 @@ class TranscriptExtractor(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class TanscriptionContextFixup(FrameProcessor):
|
||||
class TranscriptionContextFixup(FrameProcessor):
|
||||
def __init__(self, context):
|
||||
super().__init__()
|
||||
self._context = context
|
||||
@@ -181,9 +185,7 @@ class TanscriptionContextFixup(FrameProcessor):
|
||||
|
||||
if isinstance(frame, MagicDemoTranscriptionFrame):
|
||||
self._transcript = frame.text
|
||||
elif isinstance(frame, LLMFullResponseEndFrame) or isinstance(
|
||||
frame, StartInterruptionFrame
|
||||
):
|
||||
elif isinstance(frame, LLMFullResponseEndFrame) or isinstance(frame, InterruptionFrame):
|
||||
self.swap_user_audio()
|
||||
self.add_transcript_back_to_inference_output()
|
||||
self._transcript = ""
|
||||
@@ -198,17 +200,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -240,11 +245,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
audio_collector = UserAudioCollector(context, context_aggregator.user())
|
||||
pull_transcript_out_of_llm_output = TranscriptExtractor(context)
|
||||
fixup_context_messages = TanscriptionContextFixup(context)
|
||||
fixup_context_messages = TranscriptionContextFixup(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -274,7 +279,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.fish.tts import FishAudioTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -68,8 +76,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -97,7 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,7 +10,10 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -19,8 +22,8 @@ from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.ultravox.stt import UltravoxSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -44,17 +47,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -11,19 +11,24 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.neuphonic.tts import NeuphonicHttpTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -35,17 +40,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -72,8 +80,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -101,7 +109,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.neuphonic.tts import NeuphonicTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -33,17 +38,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -67,8 +75,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -96,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,19 +10,24 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.fal.stt import FalSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +39,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -70,8 +78,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -99,7 +107,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,11 +11,16 @@ import sys
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
@@ -34,7 +39,8 @@ async def main():
|
||||
LocalAudioTransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -54,8 +60,8 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -78,7 +84,7 @@ async def main():
|
||||
)
|
||||
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -11,11 +11,16 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
@@ -23,8 +28,8 @@ from pipecat.services.minimax.tts import MiniMaxHttpTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -36,17 +41,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -74,8 +82,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -103,7 +111,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
133
examples/foundational/07z-interruptible-sarvam-http.py
Normal file
@@ -0,0 +1,133 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.sarvam.tts import SarvamHttpTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = SarvamHttpTTSService(
|
||||
api_key=os.getenv("SARVAM_API_KEY"),
|
||||
aiohttp_session=session,
|
||||
params=SarvamHttpTTSService.InputParams(language=Language.EN),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -5,26 +5,31 @@
|
||||
#
|
||||
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.sarvam.tts import SarvamTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -36,17 +41,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -54,64 +62,64 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = SarvamTTSService(
|
||||
api_key=os.getenv("SARVAM_API_KEY"),
|
||||
aiohttp_session=session,
|
||||
params=SarvamTTSService.InputParams(language=Language.EN),
|
||||
)
|
||||
tts = SarvamTTSService(
|
||||
api_key=os.getenv("SARVAM_API_KEY"),
|
||||
model="bulbul:v2",
|
||||
voice_id="manisha",
|
||||
)
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
# Optionally, you can wait for 30 seconds and then change the voice.
|
||||
# await asyncio.sleep(30)
|
||||
# await task.queue_frame(TTSUpdateSettingsFrame(settings={"voice": "anushka"}))
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
|
||||
@@ -6,18 +6,16 @@ from typing import Tuple
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, TextFrame
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMContextFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.processors.aggregators import SentenceAggregator
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.daily import configure
|
||||
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
from pipecat.transports.daily.transport import DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -83,7 +81,7 @@ async def main():
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)
|
||||
|
||||
await source_queue.put(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
await source_queue.put(LLMContextFrame(LLMContext(messages)))
|
||||
await source_queue.put(EndFrame())
|
||||
await pipeline.run_pipeline()
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -24,8 +24,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport, maybe_capture_participant_camera
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -9,12 +9,16 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.filters.wake_check_filter import WakeCheckFilter
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
@@ -22,8 +26,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,17 +38,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -70,8 +77,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
hey_robot_filter = WakeCheckFilter(["hey robot", "hey, robot"])
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
|
||||
@@ -10,9 +10,13 @@ import wave
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMContextFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
OutputAudioRawFrame,
|
||||
TTSSpeakFrame,
|
||||
@@ -20,10 +24,8 @@ from pipecat.frames.frames import (
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
@@ -32,8 +34,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -71,7 +73,7 @@ class InboundSoundEffectWrapper(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, OpenAILLMContextFrame):
|
||||
if isinstance(frame, LLMContextFrame):
|
||||
await self.push_frame(sounds["ding2.wav"])
|
||||
# In case anything else downstream needs it
|
||||
await self.push_frame(frame, direction)
|
||||
@@ -86,17 +88,20 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -120,8 +125,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
out_sound = OutboundSoundEffectWrapper()
|
||||
in_sound = InboundSoundEffectWrapper()
|
||||
fl = FrameLogger("LLM Out")
|
||||
|
||||
@@ -10,13 +10,23 @@ from typing import Optional
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMContextFrame,
|
||||
TextFrame,
|
||||
TTSSpeakFrame,
|
||||
UserImageRawFrame,
|
||||
UserImageRequestFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import (
|
||||
@@ -28,12 +38,14 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.moondream.vision import MoondreamService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
"""Converts incoming text into requests for user images."""
|
||||
|
||||
def __init__(self, participant_id: Optional[str] = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -46,9 +58,32 @@ class UserImageRequester(FrameProcessor):
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
UserImageRequestFrame(self._participant_id, context=frame.text),
|
||||
FrameDirection.UPSTREAM,
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class UserImageProcessor(FrameProcessor):
|
||||
"""Converts incoming user images into context frames."""
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, UserImageRawFrame):
|
||||
if frame.request and frame.request.context:
|
||||
context = LLMContext()
|
||||
context.add_image_frame_message(
|
||||
image=frame.image,
|
||||
text=frame.request.context,
|
||||
size=frame.size,
|
||||
format=frame.format,
|
||||
)
|
||||
frame = LLMContextFrame(context)
|
||||
await self.push_frame(frame)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
@@ -59,13 +94,15 @@ transport_params = {
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -78,7 +115,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# Initialize the image requester without setting the participant ID yet
|
||||
image_requester = UserImageRequester()
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
image_processor = UserImageProcessor()
|
||||
|
||||
# If you run into weird description, try with use_cpu=True
|
||||
moondream = MoondreamService()
|
||||
@@ -96,7 +133,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
stt,
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
image_processor,
|
||||
moondream,
|
||||
tts,
|
||||
transport.output(),
|
||||
@@ -119,7 +156,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
image_requester.set_participant_id(client_id)
|
||||
|
||||
# Welcome message
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -10,13 +10,23 @@ from typing import Optional
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMContextFrame,
|
||||
TextFrame,
|
||||
TTSSpeakFrame,
|
||||
UserImageRawFrame,
|
||||
UserImageRequestFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import (
|
||||
@@ -28,12 +38,14 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
"""Converts incoming text into requests for user images."""
|
||||
|
||||
def __init__(self, participant_id: Optional[str] = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -46,9 +58,32 @@ class UserImageRequester(FrameProcessor):
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
UserImageRequestFrame(self._participant_id, context=frame.text),
|
||||
FrameDirection.UPSTREAM,
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class UserImageProcessor(FrameProcessor):
|
||||
"""Converts incoming user images into context frames."""
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, UserImageRawFrame):
|
||||
if frame.request and frame.request.context:
|
||||
context = LLMContext()
|
||||
context.add_image_frame_message(
|
||||
image=frame.image,
|
||||
text=frame.request.context,
|
||||
size=frame.size,
|
||||
format=frame.format,
|
||||
)
|
||||
frame = LLMContextFrame(context)
|
||||
await self.push_frame(frame)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
@@ -59,13 +94,15 @@ transport_params = {
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -78,7 +115,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# Initialize the image requester without setting the participant ID yet
|
||||
image_requester = UserImageRequester()
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
image_processor = UserImageProcessor()
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
@@ -96,7 +133,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
stt,
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
image_processor,
|
||||
google,
|
||||
tts,
|
||||
transport.output(),
|
||||
@@ -123,7 +160,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
image_requester.set_participant_id(client_id)
|
||||
|
||||
# Welcome message
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||