Compare commits
790 Commits
v0.0.44
...
mb/user-id
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
66b56b1edf | ||
|
|
93743fdcbc | ||
|
|
d43bc51531 | ||
|
|
a4ddf0645f | ||
|
|
15af5e0cd6 | ||
|
|
ddf3a940cb | ||
|
|
195fd43712 | ||
|
|
7c6f45e2bc | ||
|
|
915e3bb3c7 | ||
|
|
80779c48d6 | ||
|
|
c444557965 | ||
|
|
0dd22fb879 | ||
|
|
799777774b | ||
|
|
fdef8a97e2 | ||
|
|
0163247410 | ||
|
|
221e044046 | ||
|
|
532fd31fd7 | ||
|
|
3e178fd46f | ||
|
|
07cb8b7a89 | ||
|
|
e805738d4c | ||
|
|
119bc7e35f | ||
|
|
b9b02845a3 | ||
|
|
3714f12edc | ||
|
|
d2b8171197 | ||
|
|
d0b48c95bb | ||
|
|
73ed0c1ad7 | ||
|
|
c211580fec | ||
|
|
359b55a85e | ||
|
|
8b602a3f62 | ||
|
|
485c231f69 | ||
|
|
8ba3b150eb | ||
|
|
b5f72b4378 | ||
|
|
156fffe6fc | ||
|
|
c9834e2712 | ||
|
|
1e7e307f69 | ||
|
|
67e47a388d | ||
|
|
b8ffd7b16b | ||
|
|
08f1dda94e | ||
|
|
45039e7cde | ||
|
|
e50c76d075 | ||
|
|
dd9f9179cc | ||
|
|
c8da531402 | ||
|
|
25bcaf5c7c | ||
|
|
2d0f3341c3 | ||
|
|
7626d7b04b | ||
|
|
f78520f7d0 | ||
|
|
bb4766455d | ||
|
|
9dacbbbbf4 | ||
|
|
4de192fbb0 | ||
|
|
80b6c28431 | ||
|
|
f471744bca | ||
|
|
d5df4b064b | ||
|
|
06a0e29920 | ||
|
|
64eb8e7262 | ||
|
|
d8386c12dc | ||
|
|
50e798bcd9 | ||
|
|
d1ac7751da | ||
|
|
110ce27c91 | ||
|
|
8b657158ca | ||
|
|
cce14fca97 | ||
|
|
7c051516d8 | ||
|
|
5f402ad741 | ||
|
|
a80b186cea | ||
|
|
c65aaf3b2e | ||
|
|
e815d7776f | ||
|
|
11fc08ef24 | ||
|
|
6f3b0fdf73 | ||
|
|
885bc32827 | ||
|
|
7339cc7197 | ||
|
|
62e9e6bc5a | ||
|
|
329da50338 | ||
|
|
4d307d26d8 | ||
|
|
a74b9354ec | ||
|
|
11381a536f | ||
|
|
b53bc8a879 | ||
|
|
e3d8910814 | ||
|
|
e60a59434f | ||
|
|
5e5de618f3 | ||
|
|
8af92f7923 | ||
|
|
f39e17857e | ||
|
|
5b632de04a | ||
|
|
6bcc196489 | ||
|
|
66375e9dff | ||
|
|
bc839492b6 | ||
|
|
4854645637 | ||
|
|
98e80b7d4a | ||
|
|
8c0ecb89de | ||
|
|
4c8fcb2cfc | ||
|
|
92313d6ce7 | ||
|
|
1ca6ecc46e | ||
|
|
f1947d7d38 | ||
|
|
0852570212 | ||
|
|
874b8bb136 | ||
|
|
da1878537b | ||
|
|
f406d93b0f | ||
|
|
3cd2b90177 | ||
|
|
c4f0c7bcfd | ||
|
|
95e69597f3 | ||
|
|
710baa5e17 | ||
|
|
8c953bac41 | ||
|
|
4c0861ce39 | ||
|
|
12b1e1db9d | ||
|
|
53bfdfd83f | ||
|
|
2a5593afea | ||
|
|
a04a920e54 | ||
|
|
2ce6d92455 | ||
|
|
1ecd5da219 | ||
|
|
e04da334d7 | ||
|
|
7ec351813c | ||
|
|
df6c2fc403 | ||
|
|
71e107725c | ||
|
|
4d0c11fcab | ||
|
|
a8ae79831e | ||
|
|
86516d2415 | ||
|
|
5cd9dab14b | ||
|
|
a3e2e06975 | ||
|
|
e7107b99c5 | ||
|
|
aa1b8879ee | ||
|
|
6802459165 | ||
|
|
6719d1fddc | ||
|
|
a798bf18f2 | ||
|
|
f9d0cca60f | ||
|
|
cb22de0d13 | ||
|
|
7d161cc53b | ||
|
|
255abf46ef | ||
|
|
27579bcb70 | ||
|
|
1295b64879 | ||
|
|
ca57670f65 | ||
|
|
06d0a231b9 | ||
|
|
67af4e619b | ||
|
|
21c274944e | ||
|
|
3239249feb | ||
|
|
216979c377 | ||
|
|
b9db53d3cd | ||
|
|
58bfcc8370 | ||
|
|
6664c492ac | ||
|
|
7634058f97 | ||
|
|
39c6446bdc | ||
|
|
2df7dfcc91 | ||
|
|
c23c9e046c | ||
|
|
9dae753e8c | ||
|
|
40e9ee6d63 | ||
|
|
a342fe732e | ||
|
|
a729834482 | ||
|
|
94a6f1086e | ||
|
|
b42d3a8257 | ||
|
|
12ae980abe | ||
|
|
cdb909958c | ||
|
|
c72c3025f6 | ||
|
|
5cbd719780 | ||
|
|
23d6290672 | ||
|
|
d4e7e11981 | ||
|
|
8057fe3fcf | ||
|
|
3b446234a7 | ||
|
|
768487ffb3 | ||
|
|
2da5620d10 | ||
|
|
af90d65b3b | ||
|
|
c8569a7b67 | ||
|
|
0ecd98c873 | ||
|
|
6f863ba2c6 | ||
|
|
602ca5ebe6 | ||
|
|
787ade41f3 | ||
|
|
bb767831d5 | ||
|
|
bc25a771dc | ||
|
|
f37626f81d | ||
|
|
9d54578e65 | ||
|
|
79afe7ec2a | ||
|
|
2c1fd3c3cc | ||
|
|
b0dd8e03a6 | ||
|
|
ee20e48ef8 | ||
|
|
12b5c5a646 | ||
|
|
7a021cc82d | ||
|
|
3e1ec4a8ee | ||
|
|
a1377b7f1a | ||
|
|
d6335886e2 | ||
|
|
b3b7a5f023 | ||
|
|
5138017b57 | ||
|
|
87670067d7 | ||
|
|
656cd2859e | ||
|
|
15b2cc210c | ||
|
|
4667624b60 | ||
|
|
d07ba80572 | ||
|
|
386ba61483 | ||
|
|
e9d275f270 | ||
|
|
3a4994370c | ||
|
|
6125ea882d | ||
|
|
0a1ce1bb63 | ||
|
|
ab3bcde5f7 | ||
|
|
1368d3db5c | ||
|
|
cd7dec7391 | ||
|
|
a5e985094b | ||
|
|
c04c69df95 | ||
|
|
9c105e25ac | ||
|
|
6901c4fa57 | ||
|
|
469c13c07e | ||
|
|
46871ae686 | ||
|
|
ab5df1a236 | ||
|
|
f5f0de00e4 | ||
|
|
f3dd35bfd9 | ||
|
|
53a5e63990 | ||
|
|
d435a6a6d6 | ||
|
|
59240c7b96 | ||
|
|
6c11753985 | ||
|
|
6fabb7e7d5 | ||
|
|
bce218915e | ||
|
|
627c91f4a6 | ||
|
|
dac4468ca1 | ||
|
|
503eddf7d6 | ||
|
|
1a0f6f2a21 | ||
|
|
43759295cc | ||
|
|
900b95eb92 | ||
|
|
41d07692ca | ||
|
|
dcf6b6e120 | ||
|
|
99dba3b6b9 | ||
|
|
4547609ffb | ||
|
|
9554804a49 | ||
|
|
656cbc35e1 | ||
|
|
6f7c4dd998 | ||
|
|
8b496f8c6f | ||
|
|
15047f5f0a | ||
|
|
e08c24dc41 | ||
|
|
5341739ece | ||
|
|
5b0fc3fa15 | ||
|
|
b7b8e59e9e | ||
|
|
6e0d3aef32 | ||
|
|
1ccc84dd7a | ||
|
|
c9dd906057 | ||
|
|
4f093f11db | ||
|
|
887a9170b2 | ||
|
|
f2e191855a | ||
|
|
78b90e9591 | ||
|
|
17decee788 | ||
|
|
f89014d100 | ||
|
|
3b3e22fe7c | ||
|
|
0df0194cc1 | ||
|
|
8a7a61914e | ||
|
|
1117c21483 | ||
|
|
4211664a77 | ||
|
|
1f8a217cd1 | ||
|
|
b5bd662fe1 | ||
|
|
dd2703317a | ||
|
|
77aeda36eb | ||
|
|
51b235df4b | ||
|
|
4f2aee5fba | ||
|
|
55879bf365 | ||
|
|
7322badbe7 | ||
|
|
42bea578e8 | ||
|
|
2dfdceb9e6 | ||
|
|
5bfcac1f5c | ||
|
|
fb9f72d38b | ||
|
|
146a341a38 | ||
|
|
b9ca667d31 | ||
|
|
5c57cccea3 | ||
|
|
17162258a2 | ||
|
|
da3fb98101 | ||
|
|
6244124d14 | ||
|
|
53049adeea | ||
|
|
4208d2d7c4 | ||
|
|
9f7f74e4d8 | ||
|
|
f14d32d09e | ||
|
|
7351e281e2 | ||
|
|
b94b10f7d6 | ||
|
|
1cc90eb1a3 | ||
|
|
5f7d28bb05 | ||
|
|
204a08ab8f | ||
|
|
141b0a6560 | ||
|
|
ca086a856f | ||
|
|
fe0a7d07bd | ||
|
|
79eb29d614 | ||
|
|
da15c83bab | ||
|
|
d6bac77b3c | ||
|
|
7faa4eb295 | ||
|
|
0e31413851 | ||
|
|
16948b251d | ||
|
|
f3112a8638 | ||
|
|
0293d40e4e | ||
|
|
64038442ed | ||
|
|
facc280599 | ||
|
|
f90cbe8086 | ||
|
|
09a611d44b | ||
|
|
16d7fb2c4a | ||
|
|
643160c960 | ||
|
|
aac907aadb | ||
|
|
8f24ca4e58 | ||
|
|
420ce16807 | ||
|
|
2b8c35c681 | ||
|
|
3d96369193 | ||
|
|
d44b36a07c | ||
|
|
ccc96994e9 | ||
|
|
337d421338 | ||
|
|
752720b4d5 | ||
|
|
f8e69cfa00 | ||
|
|
6d11911d83 | ||
|
|
ec6e71c8ea | ||
|
|
10f854aeba | ||
|
|
d8caf007b0 | ||
|
|
26ea64ef12 | ||
|
|
19c178ebc7 | ||
|
|
3c3fd67d96 | ||
|
|
7bbc0ee8df | ||
|
|
67804edce6 | ||
|
|
ec082d0888 | ||
|
|
8631d71d5a | ||
|
|
62fc95300b | ||
|
|
db7eaed980 | ||
|
|
44c5220104 | ||
|
|
276fd86ecb | ||
|
|
2de0737056 | ||
|
|
b5d5a0e923 | ||
|
|
f3ed12c30b | ||
|
|
e14399727b | ||
|
|
414dcf9810 | ||
|
|
88d530e840 | ||
|
|
af821d8e95 | ||
|
|
133e1aff6c | ||
|
|
def415f476 | ||
|
|
a34d16dabe | ||
|
|
ec7260b237 | ||
|
|
96c6c71d5b | ||
|
|
8e140b2be6 | ||
|
|
a70c785b2e | ||
|
|
f1d3c5e9ad | ||
|
|
346329ba73 | ||
|
|
6089d4255c | ||
|
|
cff9bb6068 | ||
|
|
fdefdc9d68 | ||
|
|
2dd418a38d | ||
|
|
42f5ec20f6 | ||
|
|
5b5125b74c | ||
|
|
be4df5f713 | ||
|
|
5418cdc4d1 | ||
|
|
6c9f5a81dc | ||
|
|
027e360436 | ||
|
|
c219172266 | ||
|
|
7b040be209 | ||
|
|
0d74531f36 | ||
|
|
3341c4f608 | ||
|
|
1e45e55528 | ||
|
|
8086a94e49 | ||
|
|
81895f4a5c | ||
|
|
2846d6f461 | ||
|
|
14f309ce2b | ||
|
|
62ec2f5d1e | ||
|
|
4f9a4ebce2 | ||
|
|
5b478a5c7a | ||
|
|
87c1f2bcce | ||
|
|
b85072637f | ||
|
|
ffe1e023e7 | ||
|
|
9a358b2e86 | ||
|
|
b034c6e247 | ||
|
|
c7ca0eea0f | ||
|
|
29d931cdcd | ||
|
|
ecf0c61af9 | ||
|
|
67e8252d76 | ||
|
|
775aa9493e | ||
|
|
c446f91d4a | ||
|
|
7b6bbc29ed | ||
|
|
9e7ecccf1e | ||
|
|
a618bd3fa6 | ||
|
|
246c825a82 | ||
|
|
9e6fabf110 | ||
|
|
d2dabe4358 | ||
|
|
1db624575f | ||
|
|
a49b4e450b | ||
|
|
9211a37efc | ||
|
|
3f9d39329c | ||
|
|
5a98ae6380 | ||
|
|
8caad15e9b | ||
|
|
9222d9f721 | ||
|
|
5a467a30a3 | ||
|
|
d74e728332 | ||
|
|
8a9fdaf441 | ||
|
|
4b55c73fbe | ||
|
|
7e407e5548 | ||
|
|
ce94421c90 | ||
|
|
49ce3dcb27 | ||
|
|
6ba2dea6f0 | ||
|
|
9ac34ac371 | ||
|
|
a8644d2129 | ||
|
|
3bf15476a4 | ||
|
|
acb3e21432 | ||
|
|
8c9c81d84b | ||
|
|
e51e2f781d | ||
|
|
af6f5ecc86 | ||
|
|
81a18633ca | ||
|
|
397342d0b9 | ||
|
|
d6b3a50108 | ||
|
|
66b08161f1 | ||
|
|
e7fa1cacce | ||
|
|
2d3864ee09 | ||
|
|
0287f06379 | ||
|
|
681c8ffb1d | ||
|
|
676643d558 | ||
|
|
0c4cbc2615 | ||
|
|
e690c98230 | ||
|
|
e0a6c6871c | ||
|
|
29a042a101 | ||
|
|
1cc2da571e | ||
|
|
c6b401b5d1 | ||
|
|
315b7fcc34 | ||
|
|
e9f5fe0f37 | ||
|
|
64faf2218e | ||
|
|
e77a785a7d | ||
|
|
03a269fb87 | ||
|
|
d1a55c6063 | ||
|
|
61d0fa42f1 | ||
|
|
16de1fca9b | ||
|
|
2ad83f23c8 | ||
|
|
422ee98db0 | ||
|
|
3d4620cf95 | ||
|
|
752a6f02b5 | ||
|
|
7e41809ec2 | ||
|
|
e344a73d14 | ||
|
|
d6f480fa50 | ||
|
|
423d6485f8 | ||
|
|
842b3de7f5 | ||
|
|
3cb7829624 | ||
|
|
4292507616 | ||
|
|
98c9759f41 | ||
|
|
bafb867ffc | ||
|
|
b05809be2e | ||
|
|
57d346ce13 | ||
|
|
9001cb17ce | ||
|
|
40cfd9776f | ||
|
|
d68b3ad1b2 | ||
|
|
9b51588b92 | ||
|
|
9a36a4ca32 | ||
|
|
f80a97b545 | ||
|
|
274278e229 | ||
|
|
6b94bcac03 | ||
|
|
969b87dee9 | ||
|
|
bc699735a3 | ||
|
|
00fd381808 | ||
|
|
672b1c6d73 | ||
|
|
f455eb171b | ||
|
|
62c8c90e17 | ||
|
|
28bb448605 | ||
|
|
3d76b30a7c | ||
|
|
0ae8ca0813 | ||
|
|
0935d773f5 | ||
|
|
e0f7a8a9f4 | ||
|
|
2a0e01898f | ||
|
|
9d25e325dd | ||
|
|
37c21426bf | ||
|
|
c467ec8ded | ||
|
|
a367a038f1 | ||
|
|
e45a123eab | ||
|
|
2ecc0e2b13 | ||
|
|
d532e924cd | ||
|
|
36208049dc | ||
|
|
1d11419691 | ||
|
|
05451f882d | ||
|
|
9c22f5b81b | ||
|
|
891f261191 | ||
|
|
13c27eaa1d | ||
|
|
c395d1a234 | ||
|
|
49639c8631 | ||
|
|
695a98a1f7 | ||
|
|
5cbc37472c | ||
|
|
5b6d9a1050 | ||
|
|
332d36475b | ||
|
|
29b67578e3 | ||
|
|
9db3743901 | ||
|
|
496aded031 | ||
|
|
1c1fa0db65 | ||
|
|
a2ad40d7e0 | ||
|
|
2bb3682d88 | ||
|
|
f33f08d667 | ||
|
|
d9bc2b618f | ||
|
|
d5a50e2cad | ||
|
|
7013343bf0 | ||
|
|
728acba8a5 | ||
|
|
3b2c78747c | ||
|
|
44a0acffc8 | ||
|
|
c31d5a4f1a | ||
|
|
52caaa4afb | ||
|
|
115e75d808 | ||
|
|
897e024dd8 | ||
|
|
1cf93f1dcb | ||
|
|
d278996d5b | ||
|
|
322dd0cea1 | ||
|
|
a6a4910931 | ||
|
|
52cefaa9d6 | ||
|
|
42658ecd92 | ||
|
|
a6606a4040 | ||
|
|
d6c944cdc1 | ||
|
|
a5c7b02a73 | ||
|
|
6b9223d87e | ||
|
|
c2135cbe11 | ||
|
|
32495ddd0b | ||
|
|
4301f0abf7 | ||
|
|
5e854c4d03 | ||
|
|
bec46a87ae | ||
|
|
71cf94e936 | ||
|
|
acbecf1c4c | ||
|
|
6095fd342e | ||
|
|
bf40b4936b | ||
|
|
c60dd8d4d2 | ||
|
|
d472aaf391 | ||
|
|
6cc0b74e6c | ||
|
|
23316fbcf9 | ||
|
|
5e22ef251d | ||
|
|
c5324df807 | ||
|
|
3c19a7ae3d | ||
|
|
98c0a6e047 | ||
|
|
f599e160de | ||
|
|
11c5d822f9 | ||
|
|
c3e22f0931 | ||
|
|
9409546f90 | ||
|
|
8ddac0ccd8 | ||
|
|
6e8e7fa19a | ||
|
|
7dfa886669 | ||
|
|
da254c5143 | ||
|
|
e11f128110 | ||
|
|
3aa89fb13a | ||
|
|
f938960d50 | ||
|
|
2981d87bc1 | ||
|
|
106042bbb2 | ||
|
|
d25ddeb962 | ||
|
|
c441baa692 | ||
|
|
676ff14913 | ||
|
|
14893ade92 | ||
|
|
2a39ff69d6 | ||
|
|
e79289454a | ||
|
|
25d02da1b2 | ||
|
|
a36fc370fa | ||
|
|
e4c2f6d4c2 | ||
|
|
97659ca3f0 | ||
|
|
e00c75ce3f | ||
|
|
cf62167f54 | ||
|
|
b3dfeb61c4 | ||
|
|
bd020320cd | ||
|
|
7a55d2d7db | ||
|
|
b7308dca5d | ||
|
|
5301f44b3b | ||
|
|
686165b95a | ||
|
|
4e0ecdd673 | ||
|
|
1b74560f9d | ||
|
|
0c1070433f | ||
|
|
ece2c08cde | ||
|
|
0b9742da9e | ||
|
|
635aa6eb5b | ||
|
|
1ff17cc2b6 | ||
|
|
41ce9e9087 | ||
|
|
4803c54ecf | ||
|
|
5d7b3f2b38 | ||
|
|
23e5b1ec4d | ||
|
|
7f5a8928b8 | ||
|
|
53f675f5cf | ||
|
|
8173e4ce55 | ||
|
|
5445bb0363 | ||
|
|
a2a94724e5 | ||
|
|
a8f9b0635a | ||
|
|
4273a31fd5 | ||
|
|
67f975a2c8 | ||
|
|
d0bca67666 | ||
|
|
966974bfc6 | ||
|
|
f807f233bd | ||
|
|
33108f5798 | ||
|
|
52de825af8 | ||
|
|
5fe679039c | ||
|
|
534f710f5d | ||
|
|
53a11744a8 | ||
|
|
72412cc0c4 | ||
|
|
b77ac07bc6 | ||
|
|
eb6926e0ce | ||
|
|
3b2c9de944 | ||
|
|
27ff868e5a | ||
|
|
57ef525a8e | ||
|
|
d1db54d5fe | ||
|
|
4f88fc0eb8 | ||
|
|
37d1f4c4e1 | ||
|
|
ef9e86d997 | ||
|
|
2d2ef5a417 | ||
|
|
c1fff00586 | ||
|
|
0af2196f50 | ||
|
|
cd42320788 | ||
|
|
70fce52499 | ||
|
|
70b60c0593 | ||
|
|
2d8aa03f31 | ||
|
|
581ff26704 | ||
|
|
335178ff06 | ||
|
|
ee53535f41 | ||
|
|
91ac40307e | ||
|
|
b6c2c1f730 | ||
|
|
b56c789ae4 | ||
|
|
bd435d9e62 | ||
|
|
55a81df84f | ||
|
|
87434460f5 | ||
|
|
958ec42e8d | ||
|
|
d1fff60d1d | ||
|
|
1438e5654a | ||
|
|
1d4be0139a | ||
|
|
f58c3ee322 | ||
|
|
379750df91 | ||
|
|
d125a38737 | ||
|
|
446bb0aeaf | ||
|
|
d839080834 | ||
|
|
9b85d0642b | ||
|
|
230b51a117 | ||
|
|
3a965ca396 | ||
|
|
33fc5bf990 | ||
|
|
a54ca08405 | ||
|
|
4379db43ed | ||
|
|
e915c676aa | ||
|
|
e0a003afa1 | ||
|
|
d5666727ce | ||
|
|
f6d7402530 | ||
|
|
aefe190c9f | ||
|
|
29925a8f21 | ||
|
|
beb3271168 | ||
|
|
b959ac6e1e | ||
|
|
17f4286942 | ||
|
|
ce89bbb16e | ||
|
|
865768039b | ||
|
|
7071482583 | ||
|
|
5353d13151 | ||
|
|
a9e565f355 | ||
|
|
b6f0c16591 | ||
|
|
49005d02f5 | ||
|
|
6d8b885071 | ||
|
|
2eccb33e73 | ||
|
|
22ca4c5a02 | ||
|
|
84f26ac1ca | ||
|
|
74937411e6 | ||
|
|
8aab068ffd | ||
|
|
bd50201ce4 | ||
|
|
6082da284e | ||
|
|
358c458265 | ||
|
|
807dbbe326 | ||
|
|
3c116b291d | ||
|
|
0dd413ee90 | ||
|
|
abc8ede3d7 | ||
|
|
126324ca1b | ||
|
|
602915ae18 | ||
|
|
0ac9e2dd3f | ||
|
|
a9ef5ca95d | ||
|
|
81c476dd4c | ||
|
|
151242d3a0 | ||
|
|
93c6e5098c | ||
|
|
4455b2a428 | ||
|
|
94062592ef | ||
|
|
d2401a76c8 | ||
|
|
e2b1b56e86 | ||
|
|
84bd767312 | ||
|
|
802c29e9e1 | ||
|
|
f83381860c | ||
|
|
4dad1bfe49 | ||
|
|
9ee8896b64 | ||
|
|
5f7a2f66d4 | ||
|
|
76e5f1e847 | ||
|
|
6975340d6c | ||
|
|
0f4cf56418 | ||
|
|
018e51e8a3 | ||
|
|
b050143952 | ||
|
|
98ea1f0791 | ||
|
|
8272c35527 | ||
|
|
e973e82e05 | ||
|
|
d1396bf618 | ||
|
|
8186e423de | ||
|
|
3010addb8b | ||
|
|
029e0d391e | ||
|
|
bf31223577 | ||
|
|
42cc79154f | ||
|
|
05b857006a | ||
|
|
2e57d21b89 | ||
|
|
fa05ec46be | ||
|
|
e3ce619284 | ||
|
|
fb512dcd74 | ||
|
|
ca15d97383 | ||
|
|
b32448e967 | ||
|
|
7e30da6183 | ||
|
|
a6dd2600d2 | ||
|
|
b905b57dfc | ||
|
|
e1a7edfb58 | ||
|
|
1b30b1fc23 | ||
|
|
55026898f6 | ||
|
|
4283557894 | ||
|
|
5ab00e01aa | ||
|
|
fcfc729e83 | ||
|
|
4eacb34fd8 | ||
|
|
3a8aacccf7 | ||
|
|
54c0bf0c70 | ||
|
|
778b05a252 | ||
|
|
f16a416c2b | ||
|
|
1be63bccb8 | ||
|
|
37820ac0df | ||
|
|
8ea80d43f4 | ||
|
|
e117d70a00 | ||
|
|
2ba753272a | ||
|
|
60c8c2f6e9 | ||
|
|
cfb48200c2 | ||
|
|
6d317c6e8e | ||
|
|
158d52856f | ||
|
|
92a69e404f | ||
|
|
d24c6185d8 | ||
|
|
1fd21578a6 | ||
|
|
700db87127 | ||
|
|
6f1310569c | ||
|
|
14cedb0be8 | ||
|
|
fae97f9051 | ||
|
|
d930a46e64 | ||
|
|
2e6b5d1843 | ||
|
|
88362db034 | ||
|
|
f7f0c44c32 | ||
|
|
33553b71d4 | ||
|
|
be8ca505cd | ||
|
|
e957cce422 | ||
|
|
418a13a4ec | ||
|
|
fc445c0a1f | ||
|
|
f0c65468ed | ||
|
|
ce6a2bdcf7 | ||
|
|
673542e235 | ||
|
|
e032b0b70a | ||
|
|
e39f7e965b | ||
|
|
d26751e968 | ||
|
|
e0ca4a9c23 | ||
|
|
801e52c095 | ||
|
|
a46eaa838b | ||
|
|
7c432499db | ||
|
|
8d75fcc9f0 | ||
|
|
61d73f81ae | ||
|
|
951255def9 | ||
|
|
bf5a7c3562 | ||
|
|
e556f34094 | ||
|
|
ccc3691620 | ||
|
|
5321affda7 | ||
|
|
e5ad8dc67b | ||
|
|
46927805bc | ||
|
|
b6b1ef0a40 | ||
|
|
e62f762382 | ||
|
|
dbfda14342 | ||
|
|
fee85418cd | ||
|
|
015faa3dbd | ||
|
|
1dbf4ff27d | ||
|
|
4f1b2dce9b | ||
|
|
5640bd9447 | ||
|
|
ee5ae0d631 | ||
|
|
4b8a4b86fe | ||
|
|
3556c9ce0f | ||
|
|
f971dbe027 | ||
|
|
3815e9dec3 | ||
|
|
320f622255 | ||
|
|
be4bdabdf4 | ||
|
|
1fa52b62aa | ||
|
|
4f66e5d55f | ||
|
|
3502509d3e | ||
|
|
d71ea1c0e0 | ||
|
|
07712cdb16 | ||
|
|
13f232bafc | ||
|
|
9dd3354b89 | ||
|
|
8c006c24a3 | ||
|
|
4550545528 | ||
|
|
020f371ecb | ||
|
|
f3c0767c81 | ||
|
|
c9318ecd5c | ||
|
|
12eb9437c1 | ||
|
|
71c8c0dcdb | ||
|
|
8108423742 | ||
|
|
d67e08be4d | ||
|
|
d3f4ac61b6 | ||
|
|
c6d28bb0db | ||
|
|
2a37b2459a | ||
|
|
d1000f2fe4 | ||
|
|
e2d7af4b62 | ||
|
|
da3810f1a2 | ||
|
|
eb21597d1a | ||
|
|
e3eea0c02f | ||
|
|
45606e177c | ||
|
|
197d7b3e2b | ||
|
|
d4ec6827ce | ||
|
|
e31d1152db | ||
|
|
bb48a81103 | ||
|
|
55f1ae2564 | ||
|
|
280691b1b3 | ||
|
|
93c9e219ce | ||
|
|
edd44cc181 | ||
|
|
4075b19f7c | ||
|
|
bb14918a33 | ||
|
|
2aee8a12f8 | ||
|
|
5760fadb44 | ||
|
|
af5a7e9092 | ||
|
|
8d9a7486d1 | ||
|
|
00d0f9ae48 | ||
|
|
ec98a13a08 | ||
|
|
b999b76f70 | ||
|
|
b64dbe7bb4 | ||
|
|
0e69625a01 | ||
|
|
4e0823fced | ||
|
|
40af3571f0 |
48
.github/workflows/android.yaml
vendored
Normal file
48
.github/workflows/android.yaml
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
name: android
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "examples/simple-chatbot/client/android/**"
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
paths:
|
||||
- "examples/simple-chatbot/client/android/**"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
sdk_git_ref:
|
||||
type: string
|
||||
description: "Which git ref of the app to build"
|
||||
|
||||
concurrency:
|
||||
group: build-android-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
sdk:
|
||||
name: "Simple chatbot demo"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.sdk_git_ref || github.ref }}
|
||||
|
||||
- name: "Install Java"
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '17'
|
||||
|
||||
- name: Build demo app
|
||||
working-directory: examples/simple-chatbot/client/android
|
||||
run: ./gradlew :simple-chatbot-client:assembleDebug
|
||||
|
||||
- name: Upload demo APK
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Simple Chatbot Android Client
|
||||
path: examples/simple-chatbot/client/android/simple-chatbot-client/build/outputs/apk/debug/simple-chatbot-client-debug.apk
|
||||
9
.github/workflows/format.yaml
vendored
9
.github/workflows/format.yaml
vendored
@@ -35,7 +35,12 @@ jobs:
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r dev-requirements.txt
|
||||
- name: Ruff formatter
|
||||
id: ruff
|
||||
id: ruff-format
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff format --config line-length=100 --diff --exclude "*_pb2.py"
|
||||
ruff format --diff
|
||||
- name: Ruff import linter
|
||||
id: ruff-check
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff check --select I
|
||||
|
||||
10
.gitignore
vendored
10
.gitignore
vendored
@@ -4,6 +4,7 @@ __pycache__/
|
||||
*~
|
||||
venv
|
||||
.venv
|
||||
/.idea
|
||||
#*#
|
||||
|
||||
# Distribution / packaging
|
||||
@@ -27,4 +28,11 @@ share/python-wheels/
|
||||
MANIFEST
|
||||
.DS_Store
|
||||
.env
|
||||
fly.toml
|
||||
fly.toml
|
||||
|
||||
# Example files
|
||||
pipecat/examples/twilio-chatbot/templates/streams.xml
|
||||
|
||||
# Documentation
|
||||
docs/api/_build/
|
||||
docs/api/api
|
||||
7
.pre-commit-config.yaml
Normal file
7
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
repos:
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: ruff-format-hook
|
||||
name: Check ruff formatting
|
||||
entry: sh scripts/pre-commit.sh
|
||||
language: system
|
||||
36
.readthedocs.yaml
Normal file
36
.readthedocs.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
version: 2
|
||||
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: '3.12'
|
||||
apt_packages:
|
||||
- portaudio19-dev
|
||||
- python3-dev
|
||||
- libasound2-dev
|
||||
jobs:
|
||||
pre_build:
|
||||
- python -m pip install --upgrade pip
|
||||
- pip install wheel setuptools
|
||||
post_build:
|
||||
- echo "Build completed"
|
||||
|
||||
sphinx:
|
||||
configuration: docs/api/conf.py
|
||||
fail_on_warning: false
|
||||
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/api/requirements.txt
|
||||
- method: pip
|
||||
path: .
|
||||
|
||||
search:
|
||||
ranking:
|
||||
api/*: 5
|
||||
getting-started/*: 4
|
||||
guides/*: 3
|
||||
|
||||
submodules:
|
||||
include: all
|
||||
recursive: true
|
||||
550
CHANGELOG.md
550
CHANGELOG.md
@@ -5,6 +5,556 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Introduced pipeline frame observers. Observers can view all the frames that go
|
||||
through the pipeline without the need to inject processors in the
|
||||
pipeline. This can be useful, for example, to implement frame loggers or
|
||||
debuggers among other things.
|
||||
|
||||
- Added `30-observer.py` to show how to add an Observer to a pipeline for
|
||||
debugging.
|
||||
|
||||
- Added `OpenRouter` for OpenRouter integration with an OpenAI-compatible
|
||||
interface. Added foundational example `14m-function-calling-openrouter.py`.
|
||||
|
||||
- Added a new `WebsocketService` based class for TTS services, containing
|
||||
base functions and retry logic.
|
||||
|
||||
- Added `DeepSeekLLMService` for DeepSeek integration with an OpenAI-compatible
|
||||
interface. Added foundational example `14l-function-calling-deepseek.py`.
|
||||
|
||||
- Added `FunctionCallResultProperties` dataclass to provide a structured way to
|
||||
control function call behavior, including:
|
||||
|
||||
- `run_llm`: Controls whether to trigger LLM completion
|
||||
- `on_context_updated`: Optional callback triggered after context update
|
||||
|
||||
- Added a new foundational example `07e-interruptible-playht-http.py` for easy
|
||||
testing of `PlayHTHttpTTSService`.
|
||||
|
||||
- Added support for Google TTS Journey voices in `GoogleTTSService`.
|
||||
|
||||
- Added `29-livekit-audio-chat.py`, as a new foundational examples for
|
||||
`LiveKitTransportLayer`.
|
||||
|
||||
- Added `enable_prejoin_ui`, `max_participants` and `start_video_off` params
|
||||
to `DailyRoomProperties`.
|
||||
|
||||
- Added `session_timeout` to `FastAPIWebsocketTransport` and
|
||||
`WebsocketServerTransport` for configuring session timeouts (in
|
||||
seconds). Triggers `on_session_timeout` for custom timeout handling.
|
||||
See [examples/websocket-server/bot.py](https://github.com/pipecat-ai/pipecat/blob/main/examples/websocket-server/bot.py).
|
||||
|
||||
- Added the new modalities option and helper function to set Gemini output
|
||||
modalities.
|
||||
|
||||
- Added `examples/foundational/26d-gemini-multimodal-live-text.py` which is
|
||||
using Gemini as TEXT modality and using another TTS provider for TTS process.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated the `17-detect-user-idle.py` to show how to use the `retry_count`.
|
||||
|
||||
- Enhanced `UserIdleProcessor` with retry counting functionality. Callbacks now
|
||||
support an optional `retry_count` parameter to implement escalating responses
|
||||
to user inactivity.
|
||||
|
||||
- Modified `UserIdleProcessor` to start monitoring only after first
|
||||
conversation activity (`UserStartedSpeakingFrame` or
|
||||
`BotStartedSpeakingFrame`) instead of immediately.
|
||||
|
||||
- Modified `OpenAIAssistantContextAggregator` to support controlled completions
|
||||
and to emit context update callbacks via `FunctionCallResultProperties`.
|
||||
|
||||
- Added `aws_session_token` to the `PollyTTSService`.
|
||||
|
||||
- Changed the default model for `PlayHTHttpTTSService` to `Play3.0-mini-http`.
|
||||
|
||||
- `api_key`, `aws_access_key_id` and `region` are no longer required parameters
|
||||
for the PollyTTSService (AWSTTSService)
|
||||
|
||||
- Added `session_timeout` example in `examples/websocket-server/bot.py` to
|
||||
handle session timeout event.
|
||||
|
||||
- Changed `InputParams` in
|
||||
`src/pipecat/services/gemini_multimodal_live/gemini.py` to support different
|
||||
modalities.
|
||||
|
||||
- Changed `DeepgramSTTService` to send `finalize` event whenever VAD detects
|
||||
`UserStoppedSpeakingFrame`. This helps in faster transcriptions and clearing
|
||||
the `Deepgram` audio buffer.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed `UserIdleProcessor` not properly propagating `EndFrame`s through the
|
||||
pipeline.
|
||||
|
||||
- Fixed an issue where websocket based TTS services could incorrectly terminate
|
||||
their connection due to a retry counter not resetting.
|
||||
|
||||
- Fixed a `PipelineTask` issue that would cause a dangling task after stopping
|
||||
the pipeline with an `EndFrame`.
|
||||
|
||||
- Fixed an import issue for `PlayHTHttpTTSService`.
|
||||
|
||||
- Fixed an issue where languages couldn't be used with the `PlayHTHttpTTSService`.
|
||||
|
||||
- Fixed an issue where `OpenAIRealtimeBetaLLMService` audio chunks were hitting
|
||||
an error when truncating audio content.
|
||||
|
||||
- Fixed an issue where setting the voice and model for `RimeHttpTTSService`
|
||||
wasn't working.
|
||||
|
||||
- Fixed an issue where `IdleFrameProcessor` and `UserIdleProcessor` were getting
|
||||
initialized before the start of the pipeline.
|
||||
|
||||
## [0.0.52] - 2024-12-24
|
||||
|
||||
### Added
|
||||
|
||||
- Constructor arguments for GoogleLLMService to directly set tools and tool_config.
|
||||
|
||||
- Smart turn detection example (`22d-natural-conversation-gemini-audio.py`) that
|
||||
leverages Gemini 2.0 capabilities ().
|
||||
(see https://x.com/kwindla/status/1870974144831275410)
|
||||
|
||||
- Added `DailyTransport.send_dtmf()` to send dial-out DTMF tones.
|
||||
|
||||
- Added `DailyTransport.sip_call_transfer()` to forward SIP and PSTN calls to
|
||||
another address or number. For example, transfer a SIP call to a different
|
||||
SIP address or transfer a PSTN phone number to a different PSTN phone number.
|
||||
|
||||
- Added `DailyTransport.sip_refer()` to transfer incoming SIP/PSTN calls from
|
||||
outside Daily to another SIP/PSTN address.
|
||||
|
||||
- Added an `auto_mode` input parameter to `ElevenLabsTTSService`. `auto_mode`
|
||||
is set to `True` by default. Enabling this setting disables the chunk
|
||||
schedule and all buffers, which reduces latency.
|
||||
|
||||
- Added `KoalaFilter` which implement on device noise reduction using Koala
|
||||
Noise Suppression.
|
||||
(see https://picovoice.ai/platform/koala/)
|
||||
|
||||
- Added `CerebrasLLMService` for Cerebras integration with an OpenAI-compatible
|
||||
interface. Added foundational example `14k-function-calling-cerebras.py`.
|
||||
|
||||
- Pipecat now supports Python 3.13. We had a dependency on the `audioop` package
|
||||
which was deprecated and now removed on Python 3.13. We are now using
|
||||
`audioop-lts` (https://github.com/AbstractUmbra/audioop) to provide the same
|
||||
functionality.
|
||||
|
||||
- Added timestamped conversation transcript support:
|
||||
|
||||
- New `TranscriptProcessor` factory provides access to user and assistant
|
||||
transcript processors.
|
||||
- `UserTranscriptProcessor` processes user speech with timestamps from
|
||||
transcription.
|
||||
- `AssistantTranscriptProcessor` processes assistant responses with LLM
|
||||
context timestamps.
|
||||
- Messages emitted with ISO 8601 timestamps indicating when they were spoken.
|
||||
- Supports all LLM formats (OpenAI, Anthropic, Google) via standard message
|
||||
format.
|
||||
- New examples: `28a-transcription-processor-openai.py`,
|
||||
`28b-transcription-processor-anthropic.py`, and
|
||||
`28c-transcription-processor-gemini.py`.
|
||||
|
||||
- Add support for more languages to ElevenLabs (Arabic, Croatian, Filipino,
|
||||
Tamil) and PlayHT (Afrikans, Albanian, Amharic, Arabic, Bengali, Croatian,
|
||||
Galician, Hebrew, Mandarin, Serbian, Tagalog, Urdu, Xhosa).
|
||||
|
||||
### Changed
|
||||
|
||||
- `PlayHTTTSService` uses the new v4 websocket API, which also fixes an issue
|
||||
where text inputted to the TTS didn't return audio.
|
||||
|
||||
- The default model for `ElevenLabsTTSService` is now `eleven_flash_v2_5`.
|
||||
|
||||
- `OpenAIRealtimeBetaLLMService` now takes a `model` parameter in the
|
||||
constructor.
|
||||
|
||||
- Updated the default model for the `OpenAIRealtimeBetaLLMService`.
|
||||
|
||||
- Room expiration (`exp`) in `DailyRoomProperties` is now optional (`None`) by
|
||||
default instead of automatically setting a 5-minute expiration time. You must
|
||||
explicitly set expiration time if desired.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `AWSTTSService` is now deprecated, use `PollyTTSService` instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed token counting in `GoogleLLMService`. Tokens were summed incorrectly
|
||||
(double-counted in many cases).
|
||||
|
||||
- Fixed an issue that could cause the bot to stop talking if there was a user
|
||||
interruption before getting any audio from the TTS service.
|
||||
|
||||
- Fixed an issue that would cause `ParallelPipeline` to handle `EndFrame`
|
||||
incorrectly causing the main pipeline to not terminate or terminate too early.
|
||||
|
||||
- Fixed an audio stuttering issue in `FastPitchTTSService`.
|
||||
|
||||
- Fixed a `BaseOutputTransport` issue that was causing non-audio frames being
|
||||
processed before the previous audio frames were played. This will allow, for
|
||||
example, sending a frame `A` after a `TTSSpeakFrame` and the frame `A` will
|
||||
only be pushed downstream after the audio generated from `TTSSpeakFrame` has
|
||||
been spoken.
|
||||
|
||||
- Fixed a `DeepgramSTTService` issue that was causing language to be passed as
|
||||
an object instead of a string resulting in the connection to fail.
|
||||
|
||||
## [0.0.51] - 2024-12-16
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue in websocket-based TTS services that was causing infinite
|
||||
reconnections (Cartesia, ElevenLabs, PlayHT and LMNT).
|
||||
|
||||
## [0.0.50] - 2024-12-11
|
||||
|
||||
### Added
|
||||
|
||||
- Added `GeminiMultimodalLiveLLMService`. This is an integration for Google's
|
||||
Gemini Multimodal Live API, supporting:
|
||||
|
||||
- Real-time audio and video input processing
|
||||
- Streaming text responses with TTS
|
||||
- Audio transcription for both user and bot speech
|
||||
- Function calling
|
||||
- System instructions and context management
|
||||
- Dynamic parameter updates (temperature, top_p, etc.)
|
||||
|
||||
- Added `AudioTranscriber` utility class for handling audio transcription with
|
||||
Gemini models.
|
||||
|
||||
- Added new context classes for Gemini:
|
||||
|
||||
- `GeminiMultimodalLiveContext`
|
||||
- `GeminiMultimodalLiveUserContextAggregator`
|
||||
- `GeminiMultimodalLiveAssistantContextAggregator`
|
||||
- `GeminiMultimodalLiveContextAggregatorPair`
|
||||
|
||||
- Added new foundational examples for `GeminiMultimodalLiveLLMService`:
|
||||
|
||||
- `26-gemini-multimodal-live.py`
|
||||
- `26a-gemini-multimodal-live-transcription.py`
|
||||
- `26b-gemini-multimodal-live-video.py`
|
||||
- `26c-gemini-multimodal-live-video.py`
|
||||
|
||||
- Added `SimliVideoService`. This is an integration for Simli AI avatars.
|
||||
(see https://www.simli.com)
|
||||
|
||||
- Added NVIDIA Riva's `FastPitchTTSService` and `ParakeetSTTService`.
|
||||
(see https://www.nvidia.com/en-us/ai-data-science/products/riva/)
|
||||
|
||||
- Added `IdentityFilter`. This is the simplest frame filter that lets through
|
||||
all incoming frames.
|
||||
|
||||
- New `STTMuteStrategy` called `FUNCTION_CALL` which mutes the STT service
|
||||
during LLM function calls.
|
||||
|
||||
- `DeepgramSTTService` now exposes two event handlers `on_speech_started` and
|
||||
`on_utterance_end` that could be used to implement interruptions. See new
|
||||
example `examples/foundational/07c-interruptible-deepgram-vad.py`.
|
||||
|
||||
- Added `GroqLLMService`, `GrokLLMService`, and `NimLLMService` for Groq, Grok,
|
||||
and NVIDIA NIM API integration, with an OpenAI-compatible interface.
|
||||
|
||||
- New examples demonstrating function calling with Groq, Grok, Azure OpenAI,
|
||||
Fireworks, and NVIDIA NIM: `14f-function-calling-groq.py`,
|
||||
`14g-function-calling-grok.py`, `14h-function-calling-azure.py`,
|
||||
`14i-function-calling-fireworks.py`, and `14j-function-calling-nvidia.py`.
|
||||
|
||||
- In order to obtain the audio stored by the `AudioBufferProcessor` you can now
|
||||
also register an `on_audio_data` event handler. The `on_audio_data` handler
|
||||
will be called every time `buffer_size` (a new constructor argument) is
|
||||
reached. If `buffer_size` is 0 (default) you need to manually get the audio as
|
||||
before using `AudioBufferProcessor.merge_audio_buffers()`.
|
||||
|
||||
```
|
||||
@audiobuffer.event_handler("on_audio_data")
|
||||
async def on_audio_data(processor, audio, sample_rate, num_channels):
|
||||
await save_audio(audio, sample_rate, num_channels)
|
||||
```
|
||||
|
||||
- Added a new RTVI message called `disconnect-bot`, which when handled pushes
|
||||
an `EndFrame` to trigger the pipeline to stop.
|
||||
|
||||
### Changed
|
||||
|
||||
- `STTMuteFilter` now supports multiple simultaneous muting strategies.
|
||||
|
||||
- `XTTSService` language now defaults to `Language.EN`.
|
||||
|
||||
- `SoundfileMixer` doesn't resample input files anymore to avoid startup
|
||||
delays. The sample rate of the provided sound files now need to match the
|
||||
sample rate of the output transport.
|
||||
|
||||
- Input frames (audio, image and transport messages) are now system frames. This
|
||||
means they are processed immediately by all processors instead of being queued
|
||||
internally.
|
||||
|
||||
- Expanded the transcriptions.language module to support a superset of
|
||||
languages.
|
||||
|
||||
- Updated STT and TTS services with language options that match the supported
|
||||
languages for each service.
|
||||
|
||||
- Updated the `AzureLLMService` to use the `OpenAILLMService`. Updated the
|
||||
`api_version` to `2024-09-01-preview`.
|
||||
|
||||
- Updated the `FireworksLLMService` to use the `OpenAILLMService`. Updated the
|
||||
default model to `accounts/fireworks/models/firefunction-v2`.
|
||||
|
||||
- Updated the `simple-chatbot` example to include a Javascript and React client
|
||||
example, using RTVI JS and React.
|
||||
|
||||
### Removed
|
||||
|
||||
- Removed `AppFrame`. This was used as a special user custom frame, but there's
|
||||
actually no use case for that.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `ParallelPipeline` issue that would cause system frames to be queued.
|
||||
|
||||
- Fixed `FastAPIWebsocketTransport` so it can work with binary data (e.g. using
|
||||
the protobuf serializer).
|
||||
|
||||
- Fixed an issue in `CartesiaTTSService` that could cause previous audio to be
|
||||
received after an interruption.
|
||||
|
||||
- Fixed Cartesia, ElevenLabs, LMNT and PlayHT TTS websocket
|
||||
reconnection. Before, if an error occurred no reconnection was happening.
|
||||
|
||||
- Fixed a `BaseOutputTransport` issue that was causing audio to be discarded
|
||||
after an `EndFrame` was received.
|
||||
|
||||
- Fixed an issue in `WebsocketServerTransport` and `FastAPIWebsocketTransport`
|
||||
that would cause a busy loop when using audio mixer.
|
||||
|
||||
- Fixed a `DailyTransport` and `LiveKitTransport` issue where connections were
|
||||
being closed in the input transport prematurely. This was causing frames
|
||||
queued inside the pipeline being discarded.
|
||||
|
||||
- Fixed an issue in `DailyTransport` that would cause some internal callbacks to
|
||||
not be executed.
|
||||
|
||||
- Fixed an issue where other frames were being processed while a `CancelFrame`
|
||||
was being pushed down the pipeline.
|
||||
|
||||
- `AudioBufferProcessor` now handles interruptions properly.
|
||||
|
||||
- Fixed a `WebsocketServerTransport` issue that would prevent interruptions with
|
||||
`TwilioSerializer` from working.
|
||||
|
||||
- `DailyTransport.capture_participant_video` now allows capturing user's screen
|
||||
share by simply passing `video_source="screenVideo"`.
|
||||
|
||||
- Fixed Google Gemini message handling to properly convert appended messages to
|
||||
Gemini's required format.
|
||||
|
||||
- Fixed an issue with `FireworksLLMService` where chat completions were failing
|
||||
by removing the `stream_options` from the chat completion options.
|
||||
|
||||
## [0.0.49] - 2024-11-17
|
||||
|
||||
### Added
|
||||
|
||||
- Added RTVI `on_bot_started` event which is useful in a single turn
|
||||
interaction.
|
||||
|
||||
- Added `DailyTransport` events `dialin-connected`, `dialin-stopped`,
|
||||
`dialin-error` and `dialin-warning`. Needs daily-python >= 0.13.0.
|
||||
|
||||
- Added `RimeHttpTTSService` and the `07q-interruptible-rime.py` foundational
|
||||
example.
|
||||
|
||||
- Added `STTMuteFilter`, a general-purpose processor that combines STT
|
||||
muting and interruption control. When active, it prevents both transcription
|
||||
and interruptions during bot speech. The processor supports multiple
|
||||
strategies: `FIRST_SPEECH` (mute only during bot's first
|
||||
speech), `ALWAYS` (mute during all bot speech), or `CUSTOM` (using provided
|
||||
callback).
|
||||
|
||||
- Added `STTMuteFrame`, a control frame that enables/disables speech
|
||||
transcription in STT services.
|
||||
|
||||
## [0.0.48] - 2024-11-10 "Antonio release"
|
||||
|
||||
### Added
|
||||
|
||||
- There's now an input queue in each frame processor. When you call
|
||||
`FrameProcessor.push_frame()` this will internally call
|
||||
`FrameProcessor.queue_frame()` on the next processor (upstream or downstream)
|
||||
and the frame will be internally queued (except system frames). Then, the
|
||||
queued frames will get processed. With this input queue it is also possible
|
||||
for FrameProcessors to block processing more frames by calling
|
||||
`FrameProcessor.pause_processing_frames()`. The way to resume processing
|
||||
frames is by calling `FrameProcessor.resume_processing_frames()`.
|
||||
|
||||
- Added audio filter `NoisereduceFilter`.
|
||||
|
||||
- Introduce input transport audio filters (`BaseAudioFilter`). Audio filters can
|
||||
be used to remove background noises before audio is sent to VAD.
|
||||
|
||||
- Introduce output transport audio mixers (`BaseAudioMixer`). Output transport
|
||||
audio mixers can be used, for example, to add background sounds or any other
|
||||
audio mixing functionality before the output audio is actually written to the
|
||||
transport.
|
||||
|
||||
- Added `GatedOpenAILLMContextAggregator`. This aggregator keeps the last
|
||||
received OpenAI LLM context frame and it doesn't let it through until the
|
||||
notifier is notified.
|
||||
|
||||
- Added `WakeNotifierFilter`. This processor expects a list of frame types and
|
||||
will execute a given callback predicate when a frame of any of those type is
|
||||
being processed. If the callback returns true the notifier will be notified.
|
||||
|
||||
- Added `NullFilter`. A null filter doesn't push any frames upstream or
|
||||
downstream. This is usually used to disable one of the pipelines in
|
||||
`ParallelPipeline`.
|
||||
|
||||
- Added `EventNotifier`. This can be used as a very simple synchronization
|
||||
feature between processors.
|
||||
|
||||
- Added `TavusVideoService`. This is an integration for Tavus digital twins.
|
||||
(see https://www.tavus.io/)
|
||||
|
||||
- Added `DailyTransport.update_subscriptions()`. This allows you to have fine
|
||||
grained control of what media subscriptions you want for each participant in a
|
||||
room.
|
||||
|
||||
- Added audio filter `KrispFilter`.
|
||||
|
||||
### Changed
|
||||
|
||||
- The following `DailyTransport` functions are now `async` which means they need
|
||||
to be awaited: `start_dialout`, `stop_dialout`, `start_recording`,
|
||||
`stop_recording`, `capture_participant_transcription` and
|
||||
`capture_participant_video`.
|
||||
|
||||
- Changed default output sample rate to 24000. This changes all TTS service to
|
||||
output to 24000 and also the default output transport sample rate. This
|
||||
improves audio quality at the cost of some extra bandwidth.
|
||||
|
||||
- `AzureTTSService` now uses Azure websockets instead of HTTP requests.
|
||||
|
||||
- The previous `AzureTTSService` HTTP implementation is now
|
||||
`AzureHttpTTSService`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Websocket transports (FastAPI and Websocket) now synchronize with time before
|
||||
sending data. This allows for interruptions to just work out of the box.
|
||||
|
||||
- Improved bot speaking detection for all TTS services by using actual bot
|
||||
audio.
|
||||
|
||||
- Fixed an issue that was generating constant bot started/stopped speaking
|
||||
frames for HTTP TTS services.
|
||||
|
||||
- Fixed an issue that was causing stuttering with AWS TTS service.
|
||||
|
||||
- Fixed an issue with PlayHTTTSService, where the TTFB metrics were reporting
|
||||
very small time values.
|
||||
|
||||
- Fixed an issue where AzureTTSService wasn't initializing the specified
|
||||
language.
|
||||
|
||||
### Other
|
||||
|
||||
- Add `23-bot-background-sound.py` foundational example.
|
||||
|
||||
- Added a new foundational example `22-natural-conversation.py`. This example
|
||||
shows how to achieve a more natural conversation detecting when the user ends
|
||||
statement.
|
||||
|
||||
## [0.0.47] - 2024-10-22
|
||||
|
||||
### Added
|
||||
|
||||
- Added `AssemblyAISTTService` and corresponding foundational examples
|
||||
`07o-interruptible-assemblyai.py` and `13d-assemblyai-transcription.py`.
|
||||
|
||||
- Added a foundational example for Gladia transcription:
|
||||
`13c-gladia-transcription.py`
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `GladiaSTTService` to use the V2 API.
|
||||
|
||||
- Changed `DailyTransport` transcription model to `nova-2-general`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue that would cause an import error when importing
|
||||
`SileroVADAnalyzer` from the old package `pipecat.vad.silero`.
|
||||
|
||||
- Fixed `enable_usage_metrics` to control LLM/TTS usage metrics separately
|
||||
from `enable_metrics`.
|
||||
|
||||
## [0.0.46] - 2024-10-19
|
||||
|
||||
### Added
|
||||
|
||||
- Added `audio_passthrough` parameter to `STTService`. If enabled it allows
|
||||
audio frames to be pushed downstream in case other processors need them.
|
||||
|
||||
- Added input parameter options for `PlayHTTTSService` and
|
||||
`PlayHTHttpTTSService`.
|
||||
|
||||
### Changed
|
||||
|
||||
- Changed `DeepgramSTTService` model to `nova-2-general`.
|
||||
|
||||
- Moved `SileroVAD` audio processor to `processors.audio.vad`.
|
||||
|
||||
- Module `utils.audio` is now `audio.utils`. A new `resample_audio` function has
|
||||
been added.
|
||||
|
||||
- `PlayHTTTSService` now uses PlayHT websockets instead of HTTP requests.
|
||||
|
||||
- The previous `PlayHTTTSService` HTTP implementation is now
|
||||
`PlayHTHttpTTSService`.
|
||||
|
||||
- `PlayHTTTSService` and `PlayHTHttpTTSService` now use a `voice_engine` of
|
||||
`PlayHT3.0-mini`, which allows for multi-lingual support.
|
||||
|
||||
- Renamed `OpenAILLMServiceRealtimeBeta` to `OpenAIRealtimeBetaLLMService` to
|
||||
match other services.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `LLMUserResponseAggregator` and `LLMAssistantResponseAggregator` are
|
||||
mostly deprecated, use `OpenAILLMContext` instead.
|
||||
|
||||
- The `vad` package is now deprecated and `audio.vad` should be used
|
||||
instead. The `avd` package will get removed in a future release.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue that would cause an error if no VAD analyzer was passed to
|
||||
`LiveKitTransport` params.
|
||||
|
||||
- Fixed `SileroVAD` processor to support interruptions properly.
|
||||
|
||||
### Other
|
||||
|
||||
- Added `examples/foundational/07-interruptible-vad.py`. This is the same as
|
||||
`07-interruptible.py` but using the `SileroVAD` processor instead of passing
|
||||
the `VADAnalyzer` in the transport.
|
||||
|
||||
## [0.0.45] - 2024-10-16
|
||||
|
||||
### Changed
|
||||
|
||||
- Metrics messages have moved out from the transport's base output into RTVI.
|
||||
|
||||
## [0.0.44] - 2024-10-15
|
||||
|
||||
### Added
|
||||
|
||||
165
CONTRIBUTING.md
Normal file
165
CONTRIBUTING.md
Normal file
@@ -0,0 +1,165 @@
|
||||
## Contributing to Pipecat
|
||||
|
||||
We welcome contributions of all kinds! Your help is appreciated. Follow these steps to get involved:
|
||||
|
||||
1. **Fork this repository**: Start by forking the Pipecat Documentation repository to your GitHub account.
|
||||
|
||||
2. **Clone the repository**: Clone your forked repository to your local machine.
|
||||
```bash
|
||||
git clone https://github.com/your-username/pipecat
|
||||
```
|
||||
3. **Create a branch**: For your contribution, create a new branch.
|
||||
```bash
|
||||
git checkout -b your-branch-name
|
||||
```
|
||||
4. **Make your changes**: Edit or add files as necessary.
|
||||
5. **Test your changes**: Ensure that your changes look correct and follow the style set in the codebase.
|
||||
6. **Commit your changes**: Once you're satisfied with your changes, commit them with a meaningful message.
|
||||
|
||||
```bash
|
||||
git commit -m "Description of your changes"
|
||||
```
|
||||
|
||||
7. **Push your changes**: Push your branch to your forked repository.
|
||||
|
||||
```bash
|
||||
git push origin your-branch-name
|
||||
```
|
||||
|
||||
9. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
|
||||
> Important: Describe the changes you've made clearly!
|
||||
|
||||
Our maintainers will review your PR, and once everything is good, your contributions will be merged!
|
||||
|
||||
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
||||
identity and orientation.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official email address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement at pipecat-ai@daily.co.
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series of
|
||||
actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or permanent
|
||||
ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
||||
community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.1, available at
|
||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
||||
[https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
BSD 2-Clause License
|
||||
|
||||
Copyright (c) 2024, Daily
|
||||
Copyright (c) 2024–2025, Daily
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
181
README.md
181
README.md
@@ -1,14 +1,21 @@
|
||||
<div align="center">
|
||||
<h1><div align="center">
|
||||
<img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
|
||||
</div>
|
||||
</div></h1>
|
||||
|
||||
# Pipecat
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://docs.pipecat.ai) [](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
|
||||
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
|
||||
Pipecat is an open source Python framework for building voice and multimodal conversational agents. It handles the complex orchestration of AI services, network transport, audio processing, and multimodal interactions, letting you focus on creating engaging experiences.
|
||||
|
||||
`pipecat` is a framework for building voice (and multimodal) conversational agents. Things like personal coaches, meeting assistants, [story-telling toys for kids](https://storytelling-chatbot.fly.dev/), customer support bots, [intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0), and snarky social companions.
|
||||
## What you can build
|
||||
|
||||
Take a look at some example apps:
|
||||
- **Voice Assistants**: [Natural, real-time conversations with AI](https://demo.dailybots.ai/)
|
||||
- **Interactive Agents**: Personal coaches and meeting assistants
|
||||
- **Multimodal Apps**: Combine voice, video, images, and text
|
||||
- **Creative Tools**: [Story-telling experiences](https://storytelling-chatbot.fly.dev/) and social companions
|
||||
- **Business Solutions**: [Customer intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0) and support bots
|
||||
- **Complex conversational flows**: [Refer to Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) to learn more
|
||||
|
||||
## See it in action
|
||||
|
||||
<p float="left">
|
||||
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/simple-chatbot/image.png" width="280" /></a>
|
||||
@@ -18,43 +25,61 @@ Take a look at some example apps:
|
||||
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/moondream-chatbot/image.png" width="280" /></a>
|
||||
</p>
|
||||
|
||||
## Getting started with voice agents
|
||||
## Key features
|
||||
|
||||
- **Voice-first Design**: Built-in speech recognition, TTS, and conversation handling
|
||||
- **Flexible Integration**: Works with popular AI services (OpenAI, ElevenLabs, etc.)
|
||||
- **Pipeline Architecture**: Build complex apps from simple, reusable components
|
||||
- **Real-time Processing**: Frame-based pipeline architecture for fluid interactions
|
||||
- **Production Ready**: Enterprise-grade WebRTC and Websocket support
|
||||
|
||||
💡 Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
|
||||
|
||||
## Getting started
|
||||
|
||||
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when you’re ready. You can also add a 📞 telephone number, 🖼️ image output, 📺 video input, use different LLMs, and more.
|
||||
|
||||
```shell
|
||||
# install the module
|
||||
# Install the module
|
||||
pip install pipecat-ai
|
||||
|
||||
# set up an .env file with API keys
|
||||
# Set up your environment
|
||||
cp dot-env.template .env
|
||||
```
|
||||
|
||||
By default, in order to minimize dependencies, only the basic framework functionality is available. Some third-party AI services require additional dependencies that you can install with:
|
||||
To keep things lightweight, only the core framework is included by default. If you need support for third-party AI services, you can add the necessary dependencies with:
|
||||
|
||||
```shell
|
||||
pip install "pipecat-ai[option,...]"
|
||||
```
|
||||
|
||||
Your project may or may not need these, so they're made available as optional requirements. Here is a list:
|
||||
Available options include:
|
||||
|
||||
- **AI services**: `anthropic`, `aws`, `azure`, `deepgram`, `gladia`, `google`, `fal`, `lmnt`, `moondream`, `openai`, `openpipe`, `playht`, `silero`, `whisper`, `xtts`
|
||||
- **Transports**: `local`, `websocket`, `daily`
|
||||
| Category | Services | Install Command Example |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[openai]"` |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
|
||||
| Vision & Image | [Moondream](https://docs.pipecat.ai/server/services/vision/moondream), [fal](https://docs.pipecat.ai/server/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
|
||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
## Code examples
|
||||
|
||||
- [foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
|
||||
- [example apps](https://github.com/pipecat-ai/pipecat/tree/main/examples/) — complete applications that you can use as starting points for development
|
||||
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
|
||||
- [Example apps](https://github.com/pipecat-ai/pipecat/tree/main/examples/) — complete applications that you can use as starting points for development
|
||||
|
||||
## A simple voice agent running locally
|
||||
|
||||
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [Cartesia](https://cartesia.ai/) for text-to-speech.
|
||||
|
||||
```python
|
||||
#app.py
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -64,39 +89,43 @@ from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use Daily as a real-time media transport (WebRTC)
|
||||
transport = DailyTransport(
|
||||
room_url=...,
|
||||
token=...,
|
||||
bot_name="Bot Name",
|
||||
params=DailyParams(audio_out_enabled=True))
|
||||
# Use Daily as a real-time media transport (WebRTC)
|
||||
transport = DailyTransport(
|
||||
room_url=...,
|
||||
token="", # leave empty. Note: token is _not_ your api key
|
||||
bot_name="Bot Name",
|
||||
params=DailyParams(audio_out_enabled=True))
|
||||
|
||||
# Use Cartesia for Text-to-Speech
|
||||
tts = CartesiaTTSService(
|
||||
api_key=...,
|
||||
voice_id=...
|
||||
)
|
||||
# Use Cartesia for Text-to-Speech
|
||||
tts = CartesiaTTSService(
|
||||
api_key=...,
|
||||
voice_id=...
|
||||
)
|
||||
|
||||
# Simple pipeline that will process text to speech and output the result
|
||||
pipeline = Pipeline([tts, transport.output()])
|
||||
# Simple pipeline that will process text to speech and output the result
|
||||
pipeline = Pipeline([tts, transport.output()])
|
||||
|
||||
# Create Pipecat processor that can run one or more pipelines tasks
|
||||
runner = PipelineRunner()
|
||||
# Create Pipecat processor that can run one or more pipelines tasks
|
||||
runner = PipelineRunner()
|
||||
|
||||
# Assign the task callable to run the pipeline
|
||||
task = PipelineTask(pipeline)
|
||||
# Assign the task callable to run the pipeline
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
# Register an event handler to play audio when a
|
||||
# participant joins the transport WebRTC session
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Cartesia)
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
# Register an event handler to play audio when a
|
||||
# participant joins the transport WebRTC session
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant.get("info", {}).get("userName", "")
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Cartesia)
|
||||
await task.queue_frame(TextFrame(f"Hello there, {participant_name}!"))
|
||||
|
||||
# Run the pipeline task
|
||||
await runner.run(task)
|
||||
# Register an event handler to exit the application when the user leaves.
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
# Run the pipeline task
|
||||
await runner.run(task)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -108,7 +137,7 @@ Run it with:
|
||||
python app.py
|
||||
```
|
||||
|
||||
Daily provides a prebuilt WebRTC user interface. Whilst the app is running, you can visit at `https://<yourdomain>.daily.co/<room_url>` and listen to the bot say hello!
|
||||
Daily provides a prebuilt WebRTC user interface. While the app is running, you can visit at `https://<yourdomain>.daily.co/<room_url>` and listen to the bot say hello!
|
||||
|
||||
## WebRTC for production use
|
||||
|
||||
@@ -118,16 +147,6 @@ One way to get up and running quickly with WebRTC is to sign up for a Daily deve
|
||||
|
||||
Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://docs.daily.co/reference/rest-api/rooms) in the developer Dashboard.
|
||||
|
||||
## What is VAD?
|
||||
|
||||
Voice Activity Detection — very important for knowing when a user has finished speaking to your bot. If you are not using press-to-talk, and want Pipecat to detect when the user has finished talking, VAD is an essential component for a natural feeling conversation.
|
||||
|
||||
Pipecat makes use of WebRTC VAD by default when using a WebRTC transport layer. Optionally, you can use Silero VAD for improved accuracy at the cost of higher CPU usage.
|
||||
|
||||
```shell
|
||||
pip install pipecat-ai[silero]
|
||||
```
|
||||
|
||||
## Hacking on the framework itself
|
||||
|
||||
_Note that you may need to set up a virtual environment before following the instructions below. For instance, you might need to run the following from the root of the repo:_
|
||||
@@ -141,15 +160,24 @@ From the root of this repo, run the following:
|
||||
|
||||
```shell
|
||||
pip install -r dev-requirements.txt
|
||||
python -m build
|
||||
```
|
||||
|
||||
This builds the package. To use the package locally (e.g. to run sample files), run
|
||||
This will install the necessary development dependencies. Also, make sure you install the git pre-commit hooks:
|
||||
|
||||
```shell
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
The hooks will just save you time when you submit a PR by making sure your code follows the project rules.
|
||||
|
||||
To use the package locally (e.g. to run sample files), run:
|
||||
|
||||
```shell
|
||||
pip install --editable ".[option,...]"
|
||||
```
|
||||
|
||||
The `--editable` option makes sure you don't have to run `pip install` again and you can just edit the project files locally.
|
||||
|
||||
If you want to use this package from another directory, you can run:
|
||||
|
||||
```shell
|
||||
@@ -177,10 +205,8 @@ You can use [use-package](https://github.com/jwiegley/use-package) to install [e
|
||||
:ensure t
|
||||
:hook ((python-mode . lazy-ruff-mode))
|
||||
:config
|
||||
(setq lazy-ruff-format-command "ruff format --config line-length=100")
|
||||
(setq lazy-ruff-only-format-block t)
|
||||
(setq lazy-ruff-only-format-region t)
|
||||
(setq lazy-ruff-only-format-buffer t))
|
||||
(setq lazy-ruff-format-command "ruff format")
|
||||
(setq lazy-ruff-check-command "ruff check --select I"))
|
||||
```
|
||||
|
||||
`ruff` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
|
||||
@@ -190,24 +216,47 @@ You can use [use-package](https://github.com/jwiegley/use-package) to install [e
|
||||
:ensure t
|
||||
:defer t
|
||||
:hook ((python-mode . pyvenv-auto-run)))
|
||||
|
||||
```
|
||||
|
||||
### Visual Studio Code
|
||||
|
||||
Install the
|
||||
[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, enable formatting on save and configure `ruff` arguments:
|
||||
[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, and enable formatting on save:
|
||||
|
||||
```json
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "charliermarsh.ruff",
|
||||
"editor.formatOnSave": true
|
||||
},
|
||||
"ruff.format.args": ["--config", "line-length=100"]
|
||||
}
|
||||
```
|
||||
|
||||
### PyCharm
|
||||
|
||||
`ruff` was installed in the `venv` environment described before, now to enable autoformatting on save, go to `File` -> `Settings` -> `Tools` -> `File Watchers` and add a new watcher with the following settings:
|
||||
|
||||
1. **Name**: `Ruff formatter`
|
||||
2. **File type**: `Python`
|
||||
3. **Working directory**: `$ContentRoot$`
|
||||
4. **Arguments**: `format $FilePath$`
|
||||
5. **Program**: `$PyInterpreterDirectory$/ruff`
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome contributions from the community! Whether you're fixing bugs, improving documentation, or adding new features, here's how you can help:
|
||||
|
||||
- **Found a bug?** Open an [issue](https://github.com/pipecat-ai/pipecat/issues)
|
||||
- **Have a feature idea?** Start a [discussion](https://discord.gg/pipecat)
|
||||
- **Want to contribute code?** Check our [CONTRIBUTING.md](CONTRIBUTING.md) guide
|
||||
- **Documentation improvements?** [Docs](https://github.com/pipecat-ai/docs) PRs are always welcome
|
||||
|
||||
Before submitting a pull request, please check existing issues and PRs to avoid duplicates.
|
||||
|
||||
We aim to review all contributions promptly and provide constructive feedback to help get your changes merged.
|
||||
|
||||
## Getting help
|
||||
|
||||
➡️ [Join our Discord](https://discord.gg/pipecat)
|
||||
|
||||
➡️ [Read the docs](https://docs.pipecat.ai)
|
||||
|
||||
➡️ [Reach us on X](https://x.com/pipecat_ai)
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
build~=1.2.1
|
||||
grpcio-tools~=1.62.2
|
||||
build~=1.2.2
|
||||
grpcio-tools~=1.69.0
|
||||
pip-tools~=7.4.1
|
||||
pyright~=1.1.376
|
||||
pytest~=8.3.2
|
||||
ruff~=0.6.7
|
||||
setuptools~=72.2.0
|
||||
pre-commit~=4.0.1
|
||||
pyright~=1.1.392
|
||||
pytest~=8.3.4
|
||||
ruff~=0.9.1
|
||||
setuptools~=75.8.0
|
||||
setuptools_scm~=8.1.0
|
||||
python-dotenv~=1.0.1
|
||||
|
||||
22
docs/ISSUE_TEMPLATE.md
Normal file
22
docs/ISSUE_TEMPLATE.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Description
|
||||
Is this reporting a bug or feature request?
|
||||
|
||||
|
||||
If reporting a bug, please fill out the following:
|
||||
|
||||
### Environment
|
||||
- pipecat-ai version:
|
||||
- python version:
|
||||
- OS:
|
||||
|
||||
### Issue description
|
||||
Provide a clear description of the issue.
|
||||
|
||||
### Repro steps
|
||||
List the steps to reproduce the issue.
|
||||
|
||||
### Expected behavior
|
||||
|
||||
### Actual behavior
|
||||
|
||||
### Logs
|
||||
1
docs/PULL_REQUEST_TEMPLATE.md
Normal file
1
docs/PULL_REQUEST_TEMPLATE.md
Normal file
@@ -0,0 +1 @@
|
||||
#### Please describe the changes in your PR. If it is addressing an issue, please reference that as well.
|
||||
20
docs/api/Makefile
Normal file
20
docs/api/Makefile
Normal file
@@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
109
docs/api/README.md
Normal file
109
docs/api/README.md
Normal file
@@ -0,0 +1,109 @@
|
||||
# Pipecat Documentation
|
||||
|
||||
This directory contains the source files for auto-generating Pipecat's server API reference documentation.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Install documentation dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. Make the build scripts executable:
|
||||
|
||||
```bash
|
||||
chmod +x build-docs.sh rtd-test.py
|
||||
```
|
||||
|
||||
## Building Documentation
|
||||
|
||||
From this directory, you can build the documentation in several ways:
|
||||
|
||||
### Local Build
|
||||
|
||||
```bash
|
||||
# Using the build script (automatically opens docs when done)
|
||||
./build-docs.sh
|
||||
|
||||
# Or directly with sphinx-build
|
||||
sphinx-build -b html . _build/html -W --keep-going
|
||||
```
|
||||
|
||||
### ReadTheDocs Test Build
|
||||
|
||||
To test the documentation build process exactly as it would run on ReadTheDocs:
|
||||
|
||||
```bash
|
||||
./rtd-test.py
|
||||
```
|
||||
|
||||
This script:
|
||||
|
||||
- Creates a fresh virtual environment
|
||||
- Installs all dependencies as specified in requirements files
|
||||
- Handles conflicting dependencies (like grpcio versions for Riva and PlayHT)
|
||||
- Builds the documentation in an isolated environment
|
||||
- Provides detailed logging of the build process
|
||||
|
||||
Use this script to verify your documentation will build correctly on ReadTheDocs before pushing changes.
|
||||
|
||||
## Viewing Documentation
|
||||
|
||||
The built documentation will be available at `_build/html/index.html`. To open:
|
||||
|
||||
```bash
|
||||
# On MacOS
|
||||
open _build/html/index.html
|
||||
|
||||
# On Linux
|
||||
xdg-open _build/html/index.html
|
||||
|
||||
# On Windows
|
||||
start _build/html/index.html
|
||||
```
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
.
|
||||
├── api/ # Auto-generated API documentation
|
||||
├── _build/ # Built documentation
|
||||
├── _static/ # Static files (images, css, etc.)
|
||||
├── conf.py # Sphinx configuration
|
||||
├── index.rst # Main documentation entry point
|
||||
├── requirements-base.txt # Base documentation dependencies
|
||||
├── requirements-riva.txt # Riva-specific dependencies
|
||||
├── requirements-playht.txt # PlayHT-specific dependencies
|
||||
├── build-docs.sh # Local build script
|
||||
└── rtd-test.py # ReadTheDocs test build script
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- Documentation is auto-generated from Python docstrings
|
||||
- Service modules are automatically detected and included
|
||||
- The build process matches our ReadTheDocs configuration
|
||||
- Warnings are treated as errors (-W flag) to maintain consistency
|
||||
- The --keep-going flag ensures all errors are reported
|
||||
- Dependencies are split into multiple requirements files to handle version conflicts
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
If you encounter missing service modules:
|
||||
|
||||
1. Verify the service is installed with its extras: `pip install pipecat-ai[service-name]`
|
||||
2. Check the build logs for import errors
|
||||
3. Ensure the service module is properly initialized in the package
|
||||
4. Run `./rtd-test.py` to test in an isolated environment matching ReadTheDocs
|
||||
|
||||
For dependency conflicts:
|
||||
|
||||
1. Check the requirements files for version specifications
|
||||
2. Use `rtd-test.py` to verify dependency resolution
|
||||
3. Consider adding service-specific requirements files if needed
|
||||
|
||||
For more information:
|
||||
|
||||
- [ReadTheDocs Configuration](.readthedocs.yaml)
|
||||
- [Sphinx Documentation](https://www.sphinx-doc.org/)
|
||||
10
docs/api/build-docs.sh
Executable file
10
docs/api/build-docs.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Clean previous build
|
||||
rm -rf _build
|
||||
|
||||
# Build docs matching ReadTheDocs configuration
|
||||
sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
|
||||
|
||||
# Open docs (MacOS)
|
||||
open _build/html/index.html
|
||||
252
docs/api/conf.py
Normal file
252
docs/api/conf.py
Normal file
@@ -0,0 +1,252 @@
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger("sphinx-build")
|
||||
|
||||
# Add source directory to path
|
||||
docs_dir = Path(__file__).parent
|
||||
project_root = docs_dir.parent.parent
|
||||
sys.path.insert(0, str(project_root / "src"))
|
||||
|
||||
# Project information
|
||||
project = "pipecat-ai"
|
||||
copyright = "2024, Daily"
|
||||
author = "Daily"
|
||||
|
||||
# General configuration
|
||||
extensions = [
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinx.ext.intersphinx",
|
||||
]
|
||||
|
||||
# Napoleon settings
|
||||
napoleon_google_docstring = True
|
||||
napoleon_numpy_docstring = False
|
||||
napoleon_include_init_with_doc = True
|
||||
|
||||
# AutoDoc settings
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"member-order": "bysource",
|
||||
"special-members": "__init__",
|
||||
"undoc-members": True,
|
||||
"exclude-members": "__weakref__",
|
||||
"no-index": True,
|
||||
"show-inheritance": True,
|
||||
}
|
||||
|
||||
# Mock imports for optional dependencies
|
||||
autodoc_mock_imports = [
|
||||
"riva",
|
||||
"livekit",
|
||||
"pyht", # Base PlayHT package
|
||||
"pyht.async_client", # PlayHT specific imports
|
||||
"pyht.client",
|
||||
"pyht.protos",
|
||||
"pyht.protos.api_pb2",
|
||||
"pipecat_ai_playht", # PlayHT wrapper
|
||||
"anthropic",
|
||||
"assemblyai",
|
||||
"boto3",
|
||||
"azure",
|
||||
"cartesia",
|
||||
"deepgram",
|
||||
"elevenlabs",
|
||||
"fal",
|
||||
"gladia",
|
||||
"google",
|
||||
"krisp",
|
||||
"langchain",
|
||||
"lmnt",
|
||||
"noisereduce",
|
||||
"openai",
|
||||
"openpipe",
|
||||
"simli",
|
||||
"soundfile",
|
||||
# Existing mocks
|
||||
"pipecat_ai_krisp",
|
||||
"pyaudio",
|
||||
"_tkinter",
|
||||
"tkinter",
|
||||
"daily",
|
||||
"daily_python",
|
||||
"pydantic.BaseModel",
|
||||
"pydantic.Field",
|
||||
"pydantic._internal._model_construction",
|
||||
"pydantic._internal._fields",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ["_static"]
|
||||
autodoc_typehints = "description"
|
||||
html_show_sphinx = False
|
||||
|
||||
|
||||
def verify_modules():
|
||||
"""Verify that required modules are available."""
|
||||
required_modules = {
|
||||
"services": [
|
||||
"assemblyai",
|
||||
"aws",
|
||||
"cartesia",
|
||||
"deepgram",
|
||||
"google",
|
||||
"lmnt",
|
||||
"riva",
|
||||
"simli",
|
||||
],
|
||||
"serializers": ["livekit"],
|
||||
"vad": ["silero", "vad_analyzer"],
|
||||
"transports": {
|
||||
"services": ["daily", "livekit"],
|
||||
"local": ["audio", "tk"],
|
||||
"network": ["fastapi_websocket", "websocket_server"],
|
||||
},
|
||||
}
|
||||
|
||||
missing = []
|
||||
for category, modules in required_modules.items():
|
||||
if isinstance(modules, dict):
|
||||
# Handle nested structure
|
||||
for subcategory, submodules in modules.items():
|
||||
for module in submodules:
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.info(
|
||||
f"Successfully imported pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}"
|
||||
)
|
||||
else:
|
||||
# Handle flat structure
|
||||
for module in modules:
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{module}")
|
||||
logger.info(f"Successfully imported pipecat.{category}.{module}")
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{module} - {str(e)}"
|
||||
)
|
||||
|
||||
if missing:
|
||||
logger.warning(f"Some optional modules are not available: {missing}")
|
||||
|
||||
|
||||
def clean_title(title: str) -> str:
|
||||
"""Automatically clean module titles."""
|
||||
# Remove everything after space (like 'module', 'processor', etc.)
|
||||
title = title.split(" ")[0]
|
||||
|
||||
# Get the last part of the dot-separated path
|
||||
parts = title.split(".")
|
||||
title = parts[-1]
|
||||
|
||||
# Special cases for service names and common acronyms
|
||||
special_cases = {
|
||||
"ai": "AI",
|
||||
"aws": "AWS",
|
||||
"api": "API",
|
||||
"vad": "VAD",
|
||||
"assemblyai": "AssemblyAI",
|
||||
"deepgram": "Deepgram",
|
||||
"elevenlabs": "ElevenLabs",
|
||||
"openai": "OpenAI",
|
||||
"openpipe": "OpenPipe",
|
||||
"playht": "PlayHT",
|
||||
"xtts": "XTTS",
|
||||
"lmnt": "LMNT",
|
||||
}
|
||||
|
||||
# Check if the entire title is a special case
|
||||
if title.lower() in special_cases:
|
||||
return special_cases[title.lower()]
|
||||
|
||||
# Otherwise, capitalize each word
|
||||
words = title.split("_")
|
||||
cleaned_words = []
|
||||
for word in words:
|
||||
if word.lower() in special_cases:
|
||||
cleaned_words.append(special_cases[word.lower()])
|
||||
else:
|
||||
cleaned_words.append(word.capitalize())
|
||||
|
||||
return " ".join(cleaned_words)
|
||||
|
||||
|
||||
def setup(app):
|
||||
"""Generate API documentation during Sphinx build."""
|
||||
from sphinx.ext.apidoc import main
|
||||
|
||||
docs_dir = Path(__file__).parent
|
||||
project_root = docs_dir.parent.parent
|
||||
output_dir = str(docs_dir / "api")
|
||||
source_dir = str(project_root / "src" / "pipecat")
|
||||
|
||||
# Clean existing files
|
||||
if Path(output_dir).exists():
|
||||
import shutil
|
||||
|
||||
shutil.rmtree(output_dir)
|
||||
logger.info(f"Cleaned existing documentation in {output_dir}")
|
||||
|
||||
logger.info(f"Generating API documentation...")
|
||||
logger.info(f"Output directory: {output_dir}")
|
||||
logger.info(f"Source directory: {source_dir}")
|
||||
|
||||
excludes = [
|
||||
str(project_root / "src/pipecat/pipeline/to_be_updated"),
|
||||
str(project_root / "src/pipecat/processors/gstreamer"),
|
||||
str(project_root / "src/pipecat/services/to_be_updated"),
|
||||
str(project_root / "src/pipecat/vad"), # deprecated
|
||||
"**/test_*.py",
|
||||
"**/tests/*.py",
|
||||
]
|
||||
|
||||
try:
|
||||
main(
|
||||
[
|
||||
"-f", # Force overwriting
|
||||
"-e", # Don't generate empty files
|
||||
"-M", # Put module documentation before submodule documentation
|
||||
"--no-toc", # Don't create a table of contents file
|
||||
"--separate", # Put documentation for each module in its own page
|
||||
"--module-first", # Module documentation before submodule documentation
|
||||
"--implicit-namespaces", # Added: Handle implicit namespace packages
|
||||
"-o",
|
||||
output_dir,
|
||||
source_dir,
|
||||
]
|
||||
+ excludes
|
||||
)
|
||||
|
||||
logger.info("API documentation generated successfully!")
|
||||
|
||||
# Process generated RST files to update titles
|
||||
for rst_file in Path(output_dir).glob("**/*.rst"): # Changed to recursive glob
|
||||
content = rst_file.read_text()
|
||||
lines = content.split("\n")
|
||||
|
||||
# Find and clean up the title
|
||||
if lines and "=" in lines[1]: # Title is typically the first line
|
||||
old_title = lines[0]
|
||||
new_title = clean_title(old_title)
|
||||
content = content.replace(old_title, new_title)
|
||||
rst_file.write_text(content)
|
||||
logger.info(f"Updated title: {old_title} -> {new_title}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating API documentation: {e}", exc_info=True)
|
||||
|
||||
|
||||
# Run module verification
|
||||
verify_modules()
|
||||
77
docs/api/index.rst
Normal file
77
docs/api/index.rst
Normal file
@@ -0,0 +1,77 @@
|
||||
Pipecat API Reference Docs
|
||||
==========================
|
||||
|
||||
Welcome to Pipecat's API reference documentation!
|
||||
|
||||
Pipecat is an open source framework for building voice and multimodal assistants.
|
||||
It provides a flexible pipeline architecture for connecting various AI services,
|
||||
audio processing, and transport layers.
|
||||
|
||||
Quick Links
|
||||
-----------
|
||||
|
||||
* `GitHub Repository <https://github.com/pipecat-ai/pipecat>`_
|
||||
* `Website <https://pipecat.ai>`_
|
||||
|
||||
API Reference
|
||||
-------------
|
||||
|
||||
Core Components
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Frames <pipecat.frames>`
|
||||
* :mod:`Processors <pipecat.processors>`
|
||||
* :mod:`Pipeline <pipecat.pipeline>`
|
||||
|
||||
Audio Processing
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Audio <pipecat.audio>`
|
||||
|
||||
Services
|
||||
~~~~~~~~
|
||||
|
||||
* :mod:`Services <pipecat.services>`
|
||||
|
||||
Transport & Serialization
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Transports <pipecat.transports>`
|
||||
* :mod:`Local <pipecat.transports.local>`
|
||||
* :mod:`Network <pipecat.transports.network>`
|
||||
* :mod:`Services <pipecat.transports.services>`
|
||||
* :mod:`Serializers <pipecat.serializers>`
|
||||
|
||||
Utilities
|
||||
~~~~~~~~~
|
||||
|
||||
* :mod:`Clocks <pipecat.clocks>`
|
||||
* :mod:`Metrics <pipecat.metrics>`
|
||||
* :mod:`Sync <pipecat.sync>`
|
||||
* :mod:`Transcriptions <pipecat.transcriptions>`
|
||||
* :mod:`Utils <pipecat.utils>`
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
:caption: API Reference
|
||||
:hidden:
|
||||
|
||||
Audio <api/pipecat.audio>
|
||||
Clocks <api/pipecat.clocks>
|
||||
Frames <api/pipecat.frames>
|
||||
Metrics <api/pipecat.metrics>
|
||||
Pipeline <api/pipecat.pipeline>
|
||||
Processors <api/pipecat.processors>
|
||||
Serializers <api/pipecat.serializers>
|
||||
Services <api/pipecat.services>
|
||||
Sync <api/pipecat.sync>
|
||||
Transcriptions <api/pipecat.transcriptions>
|
||||
Transports <api/pipecat.transports>
|
||||
Utils <api/pipecat.utils>
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
35
docs/api/make.bat
Normal file
35
docs/api/make.bat
Normal file
@@ -0,0 +1,35 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=.
|
||||
set BUILDDIR=_build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
||||
40
docs/api/requirements.txt
Normal file
40
docs/api/requirements.txt
Normal file
@@ -0,0 +1,40 @@
|
||||
# Sphinx dependencies
|
||||
sphinx>=8.1.3
|
||||
sphinx-rtd-theme
|
||||
sphinx-markdown-builder
|
||||
sphinx-autodoc-typehints
|
||||
toml
|
||||
|
||||
# Install all extras individually to ensure they're properly resolved
|
||||
pipecat-ai[anthropic]
|
||||
pipecat-ai[assemblyai]
|
||||
pipecat-ai[aws]
|
||||
pipecat-ai[azure]
|
||||
pipecat-ai[canonical]
|
||||
pipecat-ai[cartesia]
|
||||
pipecat-ai[daily]
|
||||
pipecat-ai[deepgram]
|
||||
pipecat-ai[elevenlabs]
|
||||
pipecat-ai[fal]
|
||||
pipecat-ai[fireworks]
|
||||
pipecat-ai[gladia]
|
||||
pipecat-ai[google]
|
||||
pipecat-ai[grok]
|
||||
pipecat-ai[groq]
|
||||
# pipecat-ai[krisp] # Mocked instead
|
||||
pipecat-ai[langchain]
|
||||
pipecat-ai[livekit]
|
||||
pipecat-ai[lmnt]
|
||||
pipecat-ai[local]
|
||||
pipecat-ai[moondream]
|
||||
pipecat-ai[nim]
|
||||
pipecat-ai[noisereduce]
|
||||
pipecat-ai[openai]
|
||||
# pipecat-ai[openpipe]
|
||||
# pipecat-ai[playht] # Mocked due to grpcio conflict with riva
|
||||
pipecat-ai[riva]
|
||||
pipecat-ai[silero]
|
||||
pipecat-ai[simli]
|
||||
pipecat-ai[soundfile]
|
||||
pipecat-ai[websocket]
|
||||
pipecat-ai[whisper]
|
||||
38
docs/api/rtd-test.sh
Executable file
38
docs/api/rtd-test.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Configuration
|
||||
DOCS_DIR=$(pwd)
|
||||
PROJECT_ROOT=$(cd ../../ && pwd)
|
||||
TEST_DIR="/tmp/rtd-test-$(date +%Y%m%d_%H%M%S)"
|
||||
|
||||
echo "Creating test directory: $TEST_DIR"
|
||||
mkdir -p "$TEST_DIR"
|
||||
cd "$TEST_DIR"
|
||||
|
||||
# Create virtual environment
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
|
||||
echo "Installing build dependencies..."
|
||||
pip install --upgrade pip wheel setuptools
|
||||
|
||||
echo "Installing documentation dependencies..."
|
||||
pip install -r "$DOCS_DIR/requirements.txt"
|
||||
|
||||
echo "Building documentation..."
|
||||
cd "$DOCS_DIR"
|
||||
sphinx-build -b html . "_build/html"
|
||||
|
||||
echo "Build complete. Check _build/html directory for output."
|
||||
|
||||
# Print summary
|
||||
echo -e "\n=== Build Summary ==="
|
||||
echo "Documentation: $DOCS_DIR/_build/html"
|
||||
echo "Test environment: $TEST_DIR"
|
||||
echo -e "\nTo view the documentation:"
|
||||
echo "open $DOCS_DIR/_build/html/index.html"
|
||||
|
||||
# Print installed packages for verification
|
||||
echo -e "\n=== Installed Packages ==="
|
||||
pip freeze | grep -E "sphinx|pipecat"
|
||||
110
docs/frame.md
Normal file
110
docs/frame.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# Understanding Different Frame Types in the Pipecat System
|
||||
|
||||
In the Pipecat system, frames are used to represent different types of data and control signals that flow through the pipeline. Understanding these frame types is crucial for working with the system effectively. This tutorial will cover the main categories of frames and their specific uses.
|
||||
|
||||
## 1. Base Frame Classes
|
||||
|
||||
### Frame
|
||||
The `Frame` class is the base class for all frames. It includes:
|
||||
- `id`: A unique identifier
|
||||
- `name`: A descriptive name
|
||||
- `pts`: Presentation timestamp (optional)
|
||||
|
||||
### DataFrame
|
||||
`DataFrame` is a subclass of `Frame` and serves as a base for most data-carrying frames.
|
||||
|
||||
## 2. Audio Frames
|
||||
|
||||
### AudioRawFrame
|
||||
Represents a chunk of audio with properties:
|
||||
- `audio`: Raw audio data
|
||||
- `sample_rate`: Audio sample rate
|
||||
- `num_channels`: Number of audio channels
|
||||
|
||||
Subclasses include:
|
||||
- `InputAudioRawFrame`: For audio from input sources
|
||||
- `OutputAudioRawFrame`: For audio to be played by output devices
|
||||
- `TTSAudioRawFrame`: For audio generated by Text-to-Speech services
|
||||
|
||||
## 3. Image Frames
|
||||
|
||||
### ImageRawFrame
|
||||
Represents an image with properties:
|
||||
- `image`: Raw image data
|
||||
- `size`: Image dimensions
|
||||
- `format`: Image format (e.g., JPEG, PNG)
|
||||
|
||||
Subclasses include:
|
||||
- `InputImageRawFrame`: For images from input sources
|
||||
- `OutputImageRawFrame`: For images to be displayed
|
||||
- `UserImageRawFrame`: For images associated with a specific user
|
||||
- `VisionImageRawFrame`: For images with associated text for description
|
||||
- `URLImageRawFrame`: For images with an associated URL
|
||||
|
||||
### SpriteFrame
|
||||
Represents an animated sprite, containing a list of `ImageRawFrame` objects.
|
||||
|
||||
## 4. Text and Transcription Frames
|
||||
|
||||
### TextFrame
|
||||
Represents a chunk of text, used for various purposes in the pipeline.
|
||||
|
||||
### TranscriptionFrame
|
||||
A specialized `TextFrame` for speech transcriptions, including:
|
||||
- `user_id`: ID of the speaking user
|
||||
- `timestamp`: When the transcription was generated
|
||||
- `language`: Detected language of the speech
|
||||
|
||||
### InterimTranscriptionFrame
|
||||
Similar to `TranscriptionFrame`, but for interim (not final) transcriptions.
|
||||
|
||||
## 5. LLM (Language Model) Frames
|
||||
|
||||
### LLMMessagesFrame
|
||||
Contains a list of messages for an LLM service to process.
|
||||
|
||||
### LLMMessagesAppendFrame and LLMMessagesUpdateFrame
|
||||
Used to modify the current context of LLM messages.
|
||||
|
||||
### LLMSetToolsFrame
|
||||
Specifies tools (functions) available for the LLM to use.
|
||||
|
||||
### LLMEnablePromptCachingFrame
|
||||
Controls prompt caching in certain LLMs.
|
||||
|
||||
## 6. System and Control Frames
|
||||
|
||||
### SystemFrame
|
||||
Base class for system-level frames.
|
||||
|
||||
Important system frames include:
|
||||
- `StartFrame`: Initiates a pipeline
|
||||
- `CancelFrame`: Stops a pipeline immediately
|
||||
- `ErrorFrame`: Notifies of errors (with `FatalErrorFrame` for unrecoverable errors)
|
||||
- `EndTaskFrame` and `CancelTaskFrame`: Control pipeline tasks
|
||||
- `StartInterruptionFrame` and `StopInterruptionFrame`: Indicate user speech for interruptions
|
||||
|
||||
### ControlFrame
|
||||
Base class for control-flow frames.
|
||||
|
||||
Notable control frames:
|
||||
- `EndFrame`: Signals the end of a pipeline
|
||||
- `LLMFullResponseStartFrame` and `LLMFullResponseEndFrame`: Bracket LLM responses
|
||||
- `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame`: Indicate user speech activity
|
||||
- `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame`: Indicate bot speech activity
|
||||
- `TTSStartedFrame` and `TTSStoppedFrame`: Bracket Text-to-Speech responses
|
||||
|
||||
## 7. Special Purpose Frames
|
||||
|
||||
### MetricsFrame
|
||||
Contains performance metrics data.
|
||||
|
||||
### FunctionCallInProgressFrame and FunctionCallResultFrame
|
||||
Used for handling LLM function (tool) calls.
|
||||
|
||||
### ServiceUpdateSettingsFrame
|
||||
Base class for updating service settings, with specific subclasses for LLM, TTS, and STT services.
|
||||
|
||||
## Conclusion
|
||||
|
||||
Understanding these frame types is essential for working with the Pipecat system. Each frame type serves a specific purpose in the pipeline, whether it's carrying data (like audio or images), controlling the flow of the pipeline, or managing system-level operations. By using the appropriate frame types, you can effectively process and transmit various kinds of information through your pipeline.
|
||||
@@ -46,5 +46,41 @@ PLAY_HT_API_KEY=...
|
||||
# OpenAI
|
||||
OPENAI_API_KEY=...
|
||||
|
||||
#OpenPipe
|
||||
# OpenPipe
|
||||
OPENPIPE_API_KEY=...
|
||||
|
||||
# Tavus
|
||||
TAVUS_API_KEY=...
|
||||
TAVUS_REPLICA_ID=...
|
||||
TAVUS_PERSONA_ID=...
|
||||
|
||||
# Simli
|
||||
SIMLI_API_KEY=...
|
||||
SIMLI_FACE_ID=...
|
||||
|
||||
# Krisp
|
||||
KRISP_MODEL_PATH=...
|
||||
|
||||
# DeepSeek
|
||||
DEEPSEEK_API_KEY=...
|
||||
|
||||
# Groq
|
||||
GROQ_API_KEY=...
|
||||
|
||||
# Grok
|
||||
GROK_API_KEY=...
|
||||
|
||||
# Together.ai
|
||||
TOGETHER_API_KEY=...
|
||||
|
||||
# Cerebras
|
||||
CEREBRAS_API_KEY=...
|
||||
|
||||
# Fish Audio
|
||||
FISH_API_KEY=...
|
||||
|
||||
# Assembly AI
|
||||
ASSEMBLYAI_API_KEY=...
|
||||
|
||||
# OpenRouter
|
||||
OPENROUTER_API_KEY=...
|
||||
|
||||
@@ -42,6 +42,7 @@ Next, follow the steps in the README for each demo.
|
||||
| [Dialin Chatbot](dialin-chatbot) | A chatbot that connects to an incoming phone call from Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
|
||||
| [Twilio Chatbot](twilio-chatbot) | A chatbot that connects to an incoming phone call from Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
|
||||
| [studypal](studypal) | A chatbot to have a conversation about any article on the web | |
|
||||
| [WebSocket Chatbot Server](websocket-server) | A real-time websocket server that handles audio streaming and bot interactions with speech-to-text and text-to-speech capabilities | `python-websockets`, `openai`, `deepgram`, `silero-tts`, `numpy` |
|
||||
|
||||
> [!IMPORTANT]
|
||||
> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.
|
||||
|
||||
@@ -1,16 +1,10 @@
|
||||
FROM python:3.10-bullseye
|
||||
|
||||
RUN mkdir /app
|
||||
RUN mkdir /app/assets
|
||||
RUN mkdir /app/utils
|
||||
COPY *.py /app/
|
||||
COPY requirements.txt /app/
|
||||
copy assets/* /app/assets/
|
||||
copy utils/* /app/utils/
|
||||
|
||||
WORKDIR /app
|
||||
RUN pip3 install -r requirements.txt
|
||||
|
||||
EXPOSE 7860
|
||||
|
||||
CMD ["python3", "server.py"]
|
||||
CMD ["python3", "server.py"]
|
||||
|
||||
@@ -1,12 +1,41 @@
|
||||
# Simple Chatbot
|
||||
# Chatbot with canonical-metrics
|
||||
|
||||
<img src="image.png" width="420px">
|
||||
This project implements a chatbot using a pipeline architecture that integrates audio processing, transcription, and a language model for conversational interactions. The chatbot operates within a daily communication environment, utilizing various services for text-to-speech and language model responses.
|
||||
|
||||
This app connects you to a chatbot powered by GPT-4, complete with animations generated by Stable Video Diffusion.
|
||||
## Features
|
||||
|
||||
See a video of it in action: https://x.com/kwindla/status/1778628911817183509
|
||||
- **Audio Input and Output**: Captures microphone input and plays back audio responses.
|
||||
- **Voice Activity Detection**: Utilizes Silero VAD to manage audio input intelligently.
|
||||
- **Text-to-Speech**: Integrates ElevenLabs TTS service to convert text responses into audio.
|
||||
- **Language Model Interaction**: Uses OpenAI's GPT-4 model to generate responses based on user input.
|
||||
- **Transcription Services**: Captures and transcribes participant speech for analytics.
|
||||
- **Metrics Collection**: Sends audio data for analysis via Canonical Metrics Service.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.10+
|
||||
- `python-dotenv`
|
||||
- Additional libraries from the `pipecat` package.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository.
|
||||
2. Install the required packages.
|
||||
3. Set up environment variables for API keys:
|
||||
- `OPENAI_API_KEY`
|
||||
- `ELEVENLABS_API_KEY`
|
||||
- `CANONICAL_API_KEY`
|
||||
- `CANONICAL_API_URL`
|
||||
4. Run the script.
|
||||
|
||||
## Usage
|
||||
|
||||
The chatbot introduces itself and engages in conversations, providing brief and creative responses. Designed for flexibility, it can support multiple languages with appropriate configuration.
|
||||
|
||||
## Events
|
||||
|
||||
- Participants joining or leaving the call are handled dynamically, adjusting the chatbot's behavior accordingly.
|
||||
|
||||
And a quick video walkthrough of the code: https://www.loom.com/share/13df1967161f4d24ade054e7f8753416
|
||||
|
||||
ℹ️ The first time, things might take extra time to get started since VAD (Voice Activity Detection) model needs to be downloaded.
|
||||
|
||||
@@ -27,7 +56,7 @@ cp env.example .env # and add your credentials
|
||||
python server.py
|
||||
```
|
||||
|
||||
Then, visit `http://localhost:7860/start` in your browser to start a chatbot session.
|
||||
Then, visit `http://localhost:7860/` in your browser to start a chatbot session.
|
||||
|
||||
## Build and test the Docker image
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -14,20 +14,17 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.services.canonical import CanonicalMetricsService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -92,34 +89,34 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
user_response = LLMUserResponseAggregator()
|
||||
assistant_response = LLMAssistantResponseAggregator()
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
"""
|
||||
CanonicalMetrics uses AudioBufferProcessor under the hood to buffer the audio. On
|
||||
call completion, CanonicalMetrics will send the audio buffer to Canonical for
|
||||
analysis. Visit https://voice.canonical.chat to learn more.
|
||||
"""
|
||||
audio_buffer_processor = AudioBufferProcessor()
|
||||
audio_buffer_processor = AudioBufferProcessor(num_channels=2)
|
||||
canonical = CanonicalMetricsService(
|
||||
audio_buffer_processor=audio_buffer_processor,
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("CANONICAL_API_KEY"),
|
||||
api_url=os.getenv("CANONICAL_API_URL"),
|
||||
call_id=str(uuid.uuid4()),
|
||||
assistant="pipecat-chatbot",
|
||||
assistant_speaks_first=True,
|
||||
context=context,
|
||||
)
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # microphone
|
||||
user_response,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
audio_buffer_processor, # captures audio into a buffer
|
||||
canonical, # uploads audio buffer to Canonical AI for metrics
|
||||
assistant_response,
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -127,8 +124,8 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
|
||||
@@ -2,4 +2,5 @@ DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bo
|
||||
DAILY_API_KEY=7df...
|
||||
OPENAI_API_KEY=sk-PL...
|
||||
ELEVENLABS_API_KEY=aeb...
|
||||
CANONICAL_API_KEY=can...
|
||||
CANONICAL_API_KEY=can...
|
||||
CANONICAL_API_URL=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -59,7 +59,7 @@ app.add_middleware(
|
||||
)
|
||||
|
||||
|
||||
@app.get("/start")
|
||||
@app.get("/")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
|
||||
@@ -27,7 +27,7 @@ cp env.example .env # and add your credentials
|
||||
python server.py
|
||||
```
|
||||
|
||||
Then, visit `http://localhost:7860/start` in your browser to start a chatbot session.
|
||||
Then, visit `http://localhost:7860/` in your browser to start a chatbot session.
|
||||
|
||||
## Build and test the Docker image
|
||||
|
||||
|
||||
@@ -1,31 +1,32 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import wave
|
||||
|
||||
import aiofiles
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -33,6 +34,22 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def save_audio(audio: bytes, sample_rate: int, num_channels: int):
|
||||
if len(audio) > 0:
|
||||
filename = f"conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
|
||||
with io.BytesIO() as buffer:
|
||||
with wave.open(buffer, "wb") as wf:
|
||||
wf.setsampwidth(2)
|
||||
wf.setnchannels(num_channels)
|
||||
wf.setframerate(sample_rate)
|
||||
wf.writeframes(audio)
|
||||
async with aiofiles.open(filename, "wb") as file:
|
||||
await file.write(buffer.getvalue())
|
||||
print(f"Merged audio saved to {filename}")
|
||||
else:
|
||||
print("No audio data to save")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
@@ -90,39 +107,40 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
user_response = LLMUserResponseAggregator()
|
||||
assistant_response = LLMAssistantResponseAggregator()
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
# Save audio every 10 seconds.
|
||||
audiobuffer = AudioBufferProcessor(buffer_size=480000)
|
||||
|
||||
audiobuffer = AudioBufferProcessor()
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # microphone
|
||||
user_response,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
audiobuffer, # used to buffer the audio in the pipeline
|
||||
assistant_response,
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@audiobuffer.event_handler("on_audio_data")
|
||||
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
await save_audio(audio, sample_rate, num_channels)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if state == "left":
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
aiofiles
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -59,7 +59,7 @@ app.add_middleware(
|
||||
)
|
||||
|
||||
|
||||
@app.get("/start")
|
||||
@app.get("/")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
|
||||
@@ -34,6 +34,6 @@ Note: you can do this manually via the fly.io dashboard under the "secrets" sub-
|
||||
|
||||
Send a post request to your running fly.io instance:
|
||||
|
||||
`curl --location --request POST 'https://YOUR_FLY_APP_NAME/start_bot'`
|
||||
`curl --location --request POST 'https://YOUR_FLY_APP_NAME/'`
|
||||
|
||||
This request will wait until the machine enters into a `starting` state, before returning the a room URL and token to join.
|
||||
|
||||
@@ -1,24 +1,20 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -60,17 +56,17 @@ async def main(room_url: str, token: str):
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -78,8 +74,8 @@ async def main(room_url: str, token: str):
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
|
||||
@@ -1,29 +1,27 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
import subprocess
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomProperties,
|
||||
DailyRoomParams,
|
||||
DailyRoomProperties,
|
||||
)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -124,7 +122,7 @@ async def spawn_fly_machine(room_url: str, token: str):
|
||||
print(f"Machine joined room: {room_url}")
|
||||
|
||||
|
||||
@app.post("/start_bot")
|
||||
@app.post("/")
|
||||
async def start_bot(request: Request) -> JSONResponse:
|
||||
try:
|
||||
data = await request.json()
|
||||
|
||||
91
examples/deployment/modal-example/.gitignore
vendored
Normal file
91
examples/deployment/modal-example/.gitignore
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
dist/
|
||||
*.egg-info/
|
||||
*.egg
|
||||
.installed.cfg
|
||||
.eggs/
|
||||
downloads/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
MANIFEST
|
||||
|
||||
# Virtual Environments
|
||||
venv/
|
||||
env/
|
||||
.env
|
||||
.venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
.spyderproject
|
||||
.spyproject
|
||||
.ropeproject
|
||||
|
||||
# Testing and Coverage
|
||||
.coverage
|
||||
.coverage.*
|
||||
htmlcov/
|
||||
.pytest_cache/
|
||||
.tox/
|
||||
.nox/
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
cover/
|
||||
|
||||
# Logs and Databases
|
||||
*.log
|
||||
*.db
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
pip-log.txt
|
||||
|
||||
# System Files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
desktop.ini
|
||||
*.swp
|
||||
*.swo
|
||||
*.bak
|
||||
*.tmp
|
||||
*~
|
||||
|
||||
# Build and Documentation
|
||||
docs/_build/
|
||||
.pybuilder/
|
||||
target/
|
||||
instance/
|
||||
.webassets-cache
|
||||
.pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
__pypackages__/
|
||||
|
||||
# Other
|
||||
*.mo
|
||||
*.pot
|
||||
*.sage.py
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
.pyre/
|
||||
.pytype/
|
||||
cython_debug/
|
||||
.ipynb_checkpoints
|
||||
37
examples/deployment/modal-example/README.md
Normal file
37
examples/deployment/modal-example/README.md
Normal file
@@ -0,0 +1,37 @@
|
||||
# Deploying Pipecat to Modal.com
|
||||
|
||||
Barebones deployment example for [modal.com](https://www.modal.com)
|
||||
|
||||
1. Install dependencies
|
||||
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/active # or OS equivalent
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. Setup .env
|
||||
|
||||
```bash
|
||||
cp env.example .env
|
||||
```
|
||||
|
||||
Alternatively, you can configure your Modal app to use [secrets](https://modal.com/docs/guide/secrets)
|
||||
|
||||
3. Test the app locally
|
||||
|
||||
```bash
|
||||
modal serve app.py
|
||||
```
|
||||
|
||||
4. Deploy to production
|
||||
|
||||
```bash
|
||||
modal deploy app.py
|
||||
```
|
||||
|
||||
## Configuration options
|
||||
|
||||
This app sets some sensible defaults for reducing cold starts, such as `minkeep_warm=1`, which will keep at least 1 warm instance ready for your bot function.
|
||||
|
||||
It has been configured to only allow a concurrency of 1 (`max_inputs=1`) as each user will require their own running function.
|
||||
74
examples/deployment/modal-example/app.py
Normal file
74
examples/deployment/modal-example/app.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
import modal
|
||||
from bot import _voice_bot_process
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from loguru import logger
|
||||
|
||||
MAX_SESSION_TIME = 15 * 60 # 15 minutes
|
||||
|
||||
app = modal.App("pipecat-modal")
|
||||
|
||||
|
||||
image = modal.Image.debian_slim(python_version="3.12").pip_install_from_requirements(
|
||||
"requirements.txt"
|
||||
)
|
||||
|
||||
|
||||
@app.function(
|
||||
image=image,
|
||||
cpu=1.0,
|
||||
secrets=[modal.Secret.from_dotenv()],
|
||||
keep_warm=1,
|
||||
enable_memory_snapshot=True,
|
||||
max_inputs=1, # Do not reuse instances across requests
|
||||
retries=0,
|
||||
)
|
||||
def launch_bot_process(room_url: str, token: str):
|
||||
_voice_bot_process(room_url, token)
|
||||
|
||||
|
||||
@app.function(
|
||||
image=image,
|
||||
secrets=[modal.Secret.from_dotenv()],
|
||||
)
|
||||
@modal.web_endpoint(method="POST")
|
||||
async def start():
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
logger.info("Request received")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
# Create new Daily room
|
||||
room = await daily_rest_helper.create_room(DailyRoomParams())
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Unable to create room",
|
||||
)
|
||||
logger.info(f"Created room: {room.url}")
|
||||
|
||||
# Create bot token for room
|
||||
token = await daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
|
||||
if not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
logger.info(f"Bot token created: {token}")
|
||||
|
||||
# Spawn a new bot process
|
||||
launch_bot_process.spawn(room_url=room.url, token=token)
|
||||
|
||||
# Return room URL to the user to join
|
||||
# Note: in production, you would want to return a token to the user
|
||||
return JSONResponse(content={"room_url": room.url, token: token})
|
||||
90
examples/deployment/modal-example/bot.py
Normal file
90
examples/deployment/modal-example/bot.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str):
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY", ""), voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
def _voice_bot_process(room_url: str, token: str):
|
||||
asyncio.run(main(room_url, token))
|
||||
3
examples/deployment/modal-example/env.example
Normal file
3
examples/deployment/modal-example/env.example
Normal file
@@ -0,0 +1,3 @@
|
||||
DAILY_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
5
examples/deployment/modal-example/requirements.txt
Normal file
5
examples/deployment/modal-example/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==0.71.3
|
||||
pipecat-ai[daily,silero,cartesia,openai]==0.0.52
|
||||
fastapi==0.115.6
|
||||
aiohttp==3.11.11
|
||||
@@ -1,23 +1,20 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyDialinSettings
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pipecat.transports.services.daily import DailyDialinSettings, DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -65,17 +62,17 @@ async def main(room_url: str, token: str, callId: str, callDomain: str):
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -83,8 +80,8 @@ async def main(room_url: str, token: str, callId: str, callDomain: str):
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
|
||||
@@ -7,14 +7,14 @@ provisioning a room and starting a Pipecat bot in response.
|
||||
Refer to README for more information.
|
||||
"""
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, PlainTextResponse
|
||||
from twilio.twiml.voice_response import VoiceResponse
|
||||
@@ -22,13 +22,11 @@ from twilio.twiml.voice_response import VoiceResponse
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomParams,
|
||||
DailyRoomProperties,
|
||||
DailyRoomSipParams,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -108,11 +106,9 @@ async def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in docs)
|
||||
if vendor == "daily":
|
||||
bot_proc = f"python3 - m bot_daily - u {room.url} - t {token} - i {
|
||||
callId} - d {callDomain}"
|
||||
bot_proc = f"python3 -m bot_daily -u {room.url} -t {token} -i {callId} -d {callDomain}"
|
||||
else:
|
||||
bot_proc = f"python3 - m bot_twilio - u {room.url} - t {
|
||||
token} - i {callId} - s {room.config.sip_endpoint}"
|
||||
bot_proc = f"python3 -m bot_twilio -u {room.url} -t {token} -i {callId} -s {room.config.sip_endpoint}"
|
||||
|
||||
try:
|
||||
subprocess.Popen(
|
||||
|
||||
@@ -1,26 +1,21 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from twilio.rest import Client
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from twilio.rest import Client
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -69,17 +64,17 @@ async def main(room_url: str, token: str, callId: str, sipUri: str):
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -87,8 +82,8 @@ async def main(room_url: str, token: str, callId: str, sipUri: str):
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
|
||||
@@ -1,26 +1,24 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -36,7 +34,7 @@ async def main():
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
@@ -47,10 +45,12 @@ async def main():
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ""
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant.get("info", {}).get("userName", "")
|
||||
await task.queue_frames(
|
||||
[TTSSpeakFrame(f"Hello there, {participant_name}!"), EndFrame()]
|
||||
)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
@@ -17,10 +19,6 @@ from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -28,25 +26,24 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = LocalAudioTransport(TransportParams(audio_out_enabled=True))
|
||||
transport = LocalAudioTransport(TransportParams(audio_out_enabled=True))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
pipeline = Pipeline([tts, transport.output()])
|
||||
pipeline = Pipeline([tts, transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
async def say_something():
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frame(TextFrame("Hello there!"))
|
||||
async def say_something():
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frames([TTSSpeakFrame("Hello there, how is it going!"), EndFrame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner()
|
||||
|
||||
await asyncio.gather(runner.run(task), say_something())
|
||||
await asyncio.gather(runner.run(task), say_something())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -5,7 +5,7 @@ import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from livekit import api # pip install livekit-api
|
||||
from livekit import api
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
@@ -78,7 +78,7 @@ async def main():
|
||||
url=url,
|
||||
token=token,
|
||||
room_name=room_name,
|
||||
params=LiveKitParams(audio_out_enabled=True, audio_out_sample_rate=16000),
|
||||
params=LiveKitParams(audio_out_enabled=True),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
|
||||
54
examples/foundational/01c-fastpitch.py
Normal file
54
examples/foundational/01c-fastpitch.py
Normal file
@@ -0,0 +1,54 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.riva import FastPitchTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant.get("info", {}).get("userName", "")
|
||||
await task.queue_frames([TTSSpeakFrame(f"Aloha, {participant_name}!"), EndFrame()])
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,28 +1,26 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -37,7 +35,7 @@ async def main():
|
||||
room_url, None, "Say One Thing From an LLM", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@@ -1,27 +1,25 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -51,11 +49,11 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
# Note that we do not put an EndFrame() item in the pipeline for this demo.
|
||||
# This means that the bot will stay in the channel until it times out.
|
||||
# An EndFrame() in the pipeline would cause the transport to shut
|
||||
# down.
|
||||
await task.queue_frames([TextFrame("a cat in the style of picasso")])
|
||||
await task.queue_frame(TextFrame("a cat in the style of picasso"))
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -19,10 +21,6 @@ from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -8,27 +8,24 @@
|
||||
# This example broken on latest pipecat and needs updating.
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.merge_pipeline import SequentialMergePipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndPipeFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.pipeline.merge_pipeline import SequentialMergePipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.transport_services import TransportServiceOutput
|
||||
from pipecat.services.transports.daily_transport import DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
AppFrame,
|
||||
DataFrame,
|
||||
Frame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMMessagesFrame,
|
||||
@@ -22,19 +25,13 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -42,7 +39,7 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
@dataclass
|
||||
class MonthFrame(AppFrame):
|
||||
class MonthFrame(DataFrame):
|
||||
month: str
|
||||
|
||||
def __str__(self):
|
||||
|
||||
@@ -1,23 +1,25 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMMessagesFrame,
|
||||
OutputAudioRawFrame,
|
||||
TextFrame,
|
||||
TTSAudioRawFrame,
|
||||
URLImageRawFrame,
|
||||
LLMMessagesFrame,
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -26,15 +28,11 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkOutputTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
|
||||
@@ -1,39 +1,34 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import Frame, LLMMessagesFrame, MetricsFrame
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame, Frame, MetricsFrame
|
||||
from pipecat.metrics.metrics import (
|
||||
TTFBMetricsData,
|
||||
ProcessingMetricsData,
|
||||
LLMUsageMetricsData,
|
||||
ProcessingMetricsData,
|
||||
TTFBMetricsData,
|
||||
TTSUsageMetricsData,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -52,9 +47,7 @@ class MetricsLogger(FrameProcessor):
|
||||
elif isinstance(d, LLMUsageMetricsData):
|
||||
tokens = d.value
|
||||
print(
|
||||
f"!!! MetricsFrame: {frame}, tokens: {
|
||||
tokens.prompt_tokens}, characters: {
|
||||
tokens.completion_tokens}"
|
||||
f"!!! MetricsFrame: {frame}, tokens: {tokens.prompt_tokens}, characters: {tokens.completion_tokens}"
|
||||
)
|
||||
elif isinstance(d, TTSUsageMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, characters: {d.value}")
|
||||
@@ -92,29 +85,37 @@ async def main():
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
ml,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(enable_metrics=True, enable_usage_metrics=True),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,36 +1,29 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import Frame, OutputImageRawFrame, SystemFrame, TextFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame, Frame, OutputImageRawFrame, SystemFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -105,8 +98,8 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
image_sync_aggregator = ImageSyncAggregator(
|
||||
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
|
||||
@@ -117,11 +110,11 @@ async def main():
|
||||
[
|
||||
transport.input(),
|
||||
image_sync_aggregator,
|
||||
tma_in,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -129,10 +122,14 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ""
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
participant_name = participant.get("info", {}).get("userName", "")
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([TextFrame(f"Hi there {participant_name}!")])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
105
examples/foundational/07-interruptible-vad.py
Normal file
105
examples/foundational/07-interruptible-vad.py
Normal file
@@ -0,0 +1,105 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.audio.vad.silero import SileroVAD
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
vad = SileroVAD()
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
vad,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,32 +1,27 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -64,17 +59,17 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
@@ -90,10 +85,14 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -13,7 +13,8 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -21,7 +22,6 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -78,13 +78,25 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -9,8 +9,17 @@ import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain_community.chat_message_histories import ChatMessageHistory
|
||||
from langchain_core.chat_history import BaseChatMessageHistory
|
||||
from langchain_core.runnables.history import RunnableWithMessageHistory
|
||||
from langchain_openai import ChatOpenAI
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -21,19 +30,6 @@ from pipecat.processors.aggregators.llm_response import (
|
||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain_community.chat_message_histories import ChatMessageHistory
|
||||
from langchain_core.chat_history import BaseChatMessageHistory
|
||||
from langchain_core.runnables.history import RunnableWithMessageHistory
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -105,11 +101,19 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
lc.set_participant_id(participant["id"])
|
||||
# Kick off the conversation.
|
||||
# the `LLMMessagesFrame` will be picked up by the LangchainProcessor using
|
||||
@@ -118,6 +122,10 @@ async def main():
|
||||
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
117
examples/foundational/07c-interruptible-deepgram-vad.py
Normal file
117
examples/foundational/07c-interruptible-deepgram-vad.py
Normal file
@@ -0,0 +1,117 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from deepgram import LiveOptions
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
BotInterruptionFrame,
|
||||
EndFrame,
|
||||
StopInterruptionFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
live_options=LiveOptions(vad_events=True, utterance_end_ms="1000"),
|
||||
)
|
||||
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@stt.event_handler("on_speech_started")
|
||||
async def on_speech_started(stt, *args, **kwargs):
|
||||
await task.queue_frames([BotInterruptionFrame(), UserStartedSpeakingFrame()])
|
||||
|
||||
@stt.event_handler("on_utterance_end")
|
||||
async def on_utterance_end(stt, *args, **kwargs):
|
||||
await task.queue_frames([StopInterruptionFrame(), UserStoppedSpeakingFrame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -13,18 +13,15 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,11 +31,11 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
None,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
@@ -61,29 +58,40 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -13,18 +13,15 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -62,17 +59,17 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
@@ -88,10 +85,14 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
105
examples/foundational/07e-interruptible-playht-http.py
Normal file
105
examples/foundational/07e-interruptible-playht-http.py
Normal file
@@ -0,0 +1,105 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.playht import PlayHTHttpTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = PlayHTHttpTTSService(
|
||||
user_id=os.getenv("PLAYHT_USER_ID"),
|
||||
api_key=os.getenv("PLAYHT_API_KEY"),
|
||||
voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -13,18 +13,16 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.playht import PlayHTTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -42,7 +40,6 @@ async def main():
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
@@ -52,7 +49,8 @@ async def main():
|
||||
tts = PlayHTTTSService(
|
||||
user_id=os.getenv("PLAYHT_USER_ID"),
|
||||
api_key=os.getenv("PLAYHT_API_KEY"),
|
||||
voice_url="s3://voice-cloning-zero-shot/801a663f-efd0-4254-98d0-5c175514c3e8/jennifer/manifest.json",
|
||||
voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
|
||||
params=PlayHTTTSService.InputParams(language=Language.EN),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
@@ -64,28 +62,40 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,32 +1,26 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.azure import AzureLLMService, AzureSTTService, AzureTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -44,7 +38,6 @@ async def main():
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
@@ -74,29 +67,41 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -13,17 +13,14 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService, OpenAITTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -59,28 +56,40 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,33 +1,28 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openpipe import OpenPipeLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
import time
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -70,28 +65,41 @@ async def main():
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,33 +1,27 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.xtts import XTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -54,7 +48,6 @@ async def main():
|
||||
tts = XTTSService(
|
||||
aiohttp_session=session,
|
||||
voice_id="Claribel Dervla",
|
||||
language="en",
|
||||
base_url="http://localhost:8000",
|
||||
)
|
||||
|
||||
@@ -67,28 +60,40 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,33 +1,28 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.gladia import GladiaSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -69,29 +64,42 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
# Register an event handler to exit the application when the user leaves.
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,32 +1,27 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.lmnt import LmntTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -62,28 +57,40 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User respones
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -13,7 +13,8 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -21,7 +22,6 @@ from pipecat.services.ai_services import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.together import TogetherLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -52,7 +52,7 @@ async def main():
|
||||
|
||||
llm = TogetherLLMService(
|
||||
api_key=os.getenv("TOGETHER_API_KEY"),
|
||||
model=os.getenv("TOGETHER_MODEL"),
|
||||
model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
||||
params=TogetherLLMService.InputParams(
|
||||
temperature=1.0,
|
||||
top_p=0.9,
|
||||
@@ -90,15 +90,22 @@ async def main():
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True, enable_metrics=True, enable_usage_metrics=True
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
110
examples/foundational/07m-interruptible-polly.py
Normal file
110
examples/foundational/07m-interruptible-polly.py
Normal file
@@ -0,0 +1,110 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.aws import PollyTTSService
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = PollyTTSService(
|
||||
api_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
voice_id="Amy",
|
||||
params=PollyTTSService.InputParams(engine="neural", language="en-GB", rate="1.05"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -13,19 +13,17 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.services.google import GoogleTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -35,11 +33,11 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
None,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
@@ -53,8 +51,8 @@ async def main():
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = GoogleTTSService(
|
||||
voice_id="en-US-Neural2-J",
|
||||
params=GoogleTTSService.InputParams(language="en-US", rate="1.05"),
|
||||
voice_id="en-US-Journey-F",
|
||||
params=GoogleTTSService.InputParams(language=Language.EN_US),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
@@ -66,29 +64,41 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User respones
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
109
examples/foundational/07o-interruptible-assemblyai.py
Normal file
109
examples/foundational/07o-interruptible-assemblyai.py
Normal file
@@ -0,0 +1,109 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.assemblyai import AssemblyAISTTService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = AssemblyAISTTService(
|
||||
api_key=os.getenv("ASSEMBLYAI_API_KEY"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -13,16 +13,13 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.audio.filters.krisp_filter import KrispFilter
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.aws import AWSTTSService
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -43,22 +40,16 @@ async def main():
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
audio_in_filter=KrispFilter(),
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = AWSTTSService(
|
||||
api_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
voice_id="Amy",
|
||||
params=AWSTTSService.InputParams(engine="neural", language="en-GB", rate="1.05"),
|
||||
)
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
@@ -69,29 +60,40 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
104
examples/foundational/07q-interruptible-rime.py
Normal file
104
examples/foundational/07q-interruptible-rime.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.rime import RimeHttpTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = RimeHttpTTSService(
|
||||
api_key=os.getenv("RIME_API_KEY", ""),
|
||||
voice_id="rex",
|
||||
params=RimeHttpTTSService.InputParams(reduce_latency=True),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
96
examples/foundational/07r-interruptible-riva-nim.py
Normal file
96
examples/foundational/07r-interruptible-riva-nim.py
Normal file
@@ -0,0 +1,96 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.nim import NimLLMService
|
||||
from pipecat.services.riva import FastPitchTTSService, ParakeetSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = ParakeetSTTService(api_key=os.getenv("NVIDIA_API_KEY"))
|
||||
|
||||
llm = NimLLMService(
|
||||
api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct"
|
||||
)
|
||||
|
||||
tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
282
examples/foundational/07s-interruptible-google-audio-in.py
Normal file
282
examples/foundational/07s-interruptible-google-audio-in.py
Normal file
@@ -0,0 +1,282 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
|
||||
import aiohttp
|
||||
import google.ai.generativelanguage as glm
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
EndFrame,
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
StartInterruptionFrame,
|
||||
TextFrame,
|
||||
TranscriptionFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
marker = "|----|"
|
||||
system_message = f"""
|
||||
You are a helpful LLM in a WebRTC call. Your goals are to be helpful and brief in your responses.
|
||||
|
||||
You are expert at transcribing audio to text. You will receive a mixture of audio and text input. When
|
||||
asked to transcribe what the user said, output an exact, word-for-word transcription.
|
||||
|
||||
Your output will be converted to audio so don't include special characters in your answers.
|
||||
|
||||
Each time you answer, you should respond in three parts.
|
||||
|
||||
1. Transcribe exactly what the user said.
|
||||
2. Output the separator field '{marker}'.
|
||||
3. Respond to the user's input in a helpful, creative way using only simple text and punctuation.
|
||||
|
||||
Example:
|
||||
|
||||
User: How many ounces are in a pound?
|
||||
|
||||
You: How many ounces are in a pound?
|
||||
{marker}
|
||||
There are 16 ounces in a pound.
|
||||
"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class MagicDemoTranscriptionFrame(Frame):
|
||||
text: str
|
||||
|
||||
|
||||
class UserAudioCollector(FrameProcessor):
|
||||
def __init__(self, context, user_context_aggregator):
|
||||
super().__init__()
|
||||
self._context = context
|
||||
self._user_context_aggregator = user_context_aggregator
|
||||
self._audio_frames = []
|
||||
self._start_secs = 0.2 # this should match VAD start_secs (hardcoding for now)
|
||||
self._user_speaking = False
|
||||
|
||||
async def process_frame(self, frame, direction):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
# We could gracefully handle both audio input and text/transcription input ...
|
||||
# but let's leave that as an exercise to the reader. :-)
|
||||
return
|
||||
if isinstance(frame, UserStartedSpeakingFrame):
|
||||
self._user_speaking = True
|
||||
elif isinstance(frame, UserStoppedSpeakingFrame):
|
||||
self._user_speaking = False
|
||||
self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||
await self._user_context_aggregator.push_frame(
|
||||
self._user_context_aggregator.get_context_frame()
|
||||
)
|
||||
elif isinstance(frame, InputAudioRawFrame):
|
||||
if self._user_speaking:
|
||||
self._audio_frames.append(frame)
|
||||
else:
|
||||
# Append the audio frame to our buffer. Treat the buffer as a ring buffer, dropping the oldest
|
||||
# frames as necessary. Assume all audio frames have the same duration.
|
||||
self._audio_frames.append(frame)
|
||||
frame_duration = len(frame.audio) / 16 * frame.num_channels / frame.sample_rate
|
||||
buffer_duration = frame_duration * len(self._audio_frames)
|
||||
while buffer_duration > self._start_secs:
|
||||
self._audio_frames.pop(0)
|
||||
buffer_duration -= frame_duration
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class TranscriptExtractor(FrameProcessor):
|
||||
def __init__(self, context):
|
||||
super().__init__()
|
||||
self._context = context
|
||||
self._accumulator = ""
|
||||
self._processing_llm_response = False
|
||||
self._accumulating_transcript = False
|
||||
|
||||
def reset(self):
|
||||
self._accumulator = ""
|
||||
self._processing_llm_response = False
|
||||
self._accumulating_transcript = False
|
||||
|
||||
async def process_frame(self, frame, direction):
|
||||
await super().process_frame(frame, direction)
|
||||
if isinstance(frame, LLMFullResponseStartFrame):
|
||||
self._processing_llm_response = True
|
||||
self._accumulating_transcript = True
|
||||
elif isinstance(frame, TextFrame) and self._processing_llm_response:
|
||||
if self._accumulating_transcript:
|
||||
text = frame.text
|
||||
split_index = text.find(marker)
|
||||
if split_index < 0:
|
||||
self._accumulator += frame.text
|
||||
# do not push this frame
|
||||
return
|
||||
else:
|
||||
self._accumulating_transcript = False
|
||||
self._accumulator += text[:split_index]
|
||||
frame.text = text[split_index + len(marker) :]
|
||||
await self.push_frame(frame)
|
||||
return
|
||||
elif isinstance(frame, LLMFullResponseEndFrame):
|
||||
await self.push_frame(MagicDemoTranscriptionFrame(text=self._accumulator.strip()))
|
||||
self.reset()
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class TanscriptionContextFixup(FrameProcessor):
|
||||
def __init__(self, context):
|
||||
super().__init__()
|
||||
self._context = context
|
||||
self._transcript = "THIS IS A TRANSCRIPT"
|
||||
|
||||
def swap_user_audio(self):
|
||||
if not self._transcript:
|
||||
return
|
||||
message = self._context.messages[-2]
|
||||
last_part = message.parts[-1]
|
||||
if (
|
||||
message.role == "user"
|
||||
and last_part.inline_data
|
||||
and last_part.inline_data.mime_type == "audio/wav"
|
||||
):
|
||||
self._context.messages[-2] = glm.Content(
|
||||
role="user", parts=[glm.Part(text=self._transcript)]
|
||||
)
|
||||
|
||||
def add_transcript_back_to_inference_output(self):
|
||||
if not self._transcript:
|
||||
return
|
||||
message = self._context.messages[-1]
|
||||
last_part = message.parts[-1]
|
||||
if message.role == "model" and last_part.text:
|
||||
self._context.messages[-1].parts[-1].text += f"\n\n{marker}\n{self._transcript}\n"
|
||||
|
||||
async def process_frame(self, frame, direction):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, MagicDemoTranscriptionFrame):
|
||||
self._transcript = frame.text
|
||||
elif isinstance(frame, LLMFullResponseEndFrame) or isinstance(
|
||||
frame, StartInterruptionFrame
|
||||
):
|
||||
self.swap_user_audio()
|
||||
self.add_transcript_back_to_inference_output()
|
||||
self._transcript = ""
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
# No transcription at all. just audio input to Gemini!
|
||||
# transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(
|
||||
model="gemini-1.5-flash-latest",
|
||||
# model="gemini-exp-1114",
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": system_message,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Start by saying hello.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
audio_collector = UserAudioCollector(context, context_aggregator.user())
|
||||
pull_transcript_out_of_llm_output = TranscriptExtractor(context)
|
||||
fixup_context_messages = TanscriptionContextFixup(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
audio_collector,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
pull_transcript_out_of_llm_output,
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
fixup_context_messages,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
103
examples/foundational/07t-interruptible-fish.py
Normal file
103
examples/foundational/07t-interruptible-fish.py
Normal file
@@ -0,0 +1,103 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.fish import FishAudioTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = FishAudioTTSService(
|
||||
api_key=os.getenv("FISH_API_KEY"),
|
||||
model="4ce7e917cedd4bc2bb2e6ff3a46acaa1", # Barack Obama
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,20 +1,19 @@
|
||||
from typing import Tuple
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from pipecat.processors.aggregators import SentenceAggregator
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from typing import Tuple
|
||||
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.processors.aggregators import SentenceAggregator
|
||||
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
@@ -19,13 +23,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.services.daily import DailyTransport, DailyParams
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -63,6 +61,7 @@ async def main():
|
||||
"Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_in_sample_rate=24000,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
@@ -73,7 +72,7 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
await transport.capture_participant_video(participant["id"])
|
||||
|
||||
pipeline = Pipeline([transport.input(), MirrorProcessor(), transport.output()])
|
||||
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
@@ -25,12 +28,6 @@ from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -65,7 +62,7 @@ async def main():
|
||||
tk_root.title("Local Mirror")
|
||||
|
||||
daily_transport = DailyTransport(
|
||||
room_url, token, "Test", DailyParams(audio_in_enabled=True)
|
||||
room_url, token, "Test", DailyParams(audio_in_enabled=True, audio_in_sample_rate=24000)
|
||||
)
|
||||
|
||||
tk_transport = TkLocalTransport(
|
||||
@@ -81,7 +78,7 @@ async def main():
|
||||
|
||||
@daily_transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
await transport.capture_participant_video(participant["id"])
|
||||
|
||||
pipeline = Pipeline([daily_transport.input(), MirrorProcessor(), tk_transport.output()])
|
||||
|
||||
|
||||
@@ -1,32 +1,27 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.processors.filters.wake_check_filter import WakeCheckFilter
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.filters.wake_check_filter import WakeCheckFilter
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -65,18 +60,19 @@ async def main():
|
||||
]
|
||||
|
||||
hey_robot_filter = WakeCheckFilter(["hey robot", "hey, robot"])
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
hey_robot_filter, # Filter out speech not directed at the robot
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
@@ -84,7 +80,7 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await tts.say("Hi! If you want to talk to me, just say 'Hey Robot'.")
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
@@ -1,40 +1,37 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import wave
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMMessagesFrame,
|
||||
OutputAudioRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMUserResponseAggregator,
|
||||
LLMAssistantResponseAggregator,
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -75,7 +72,7 @@ class InboundSoundEffectWrapper(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, LLMMessagesFrame):
|
||||
if isinstance(frame, OpenAILLMContextFrame):
|
||||
await self.push_frame(sounds["ding2.wav"])
|
||||
# In case anything else downstream needs it
|
||||
await self.push_frame(frame, direction)
|
||||
@@ -101,7 +98,7 @@ async def main():
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
@@ -113,8 +110,8 @@ async def main():
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
out_sound = OutboundSoundEffectWrapper()
|
||||
in_sound = InboundSoundEffectWrapper()
|
||||
fl = FrameLogger("LLM Out")
|
||||
@@ -123,7 +120,7 @@ async def main():
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
context_aggregator.user(),
|
||||
in_sound,
|
||||
fl2,
|
||||
llm,
|
||||
@@ -131,13 +128,13 @@ async def main():
|
||||
tts,
|
||||
out_sound,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await tts.say("Hi, I'm listening!")
|
||||
await transport.send_audio(sounds["ding1.wav"])
|
||||
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -19,13 +24,6 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -84,8 +82,8 @@ async def main():
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
transport.capture_participant_video(participant["id"], framerate=0)
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_video(participant["id"], framerate=0)
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -19,13 +24,6 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -86,8 +84,8 @@ async def main():
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
transport.capture_participant_video(participant["id"], framerate=0)
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_video(participant["id"], framerate=0)
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -19,13 +24,6 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -83,8 +81,8 @@ async def main():
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
transport.capture_participant_video(participant["id"], framerate=0)
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_video(participant["id"], framerate=0)
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -16,16 +21,9 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -78,16 +76,13 @@ async def main():
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
params=CartesiaTTSService.InputParams(
|
||||
sample_rate=16000,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
transport.capture_participant_video(participant["id"], framerate=0)
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_video(participant["id"], framerate=0)
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -16,12 +20,6 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.whisper import WhisperSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -7,6 +7,9 @@
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -16,10 +19,6 @@ from pipecat.services.whisper import WhisperSTTService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
|
||||
@@ -1,28 +1,26 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.deepgram import DeepgramSTTService, LiveOptions, Language
|
||||
from pipecat.services.deepgram import DeepgramSTTService, Language, LiveOptions
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
|
||||
63
examples/foundational/13c-gladia-transcription.py
Normal file
63
examples/foundational/13c-gladia-transcription.py
Normal file
@@ -0,0 +1,63 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.gladia import GladiaSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Transcription bot", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
stt = GladiaSTTService(
|
||||
api_key=os.getenv("GLADIA_API_KEY"),
|
||||
# live_options=LiveOptions(language=Language.FR),
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
62
examples/foundational/13d-assemblyai-transcription.py
Normal file
62
examples/foundational/13d-assemblyai-transcription.py
Normal file
@@ -0,0 +1,62 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.assemblyai import AssemblyAISTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Transcription bot", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
stt = AssemblyAISTTService(
|
||||
api_key=os.getenv("ASSEMBLYAI_API_KEY"),
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,29 +1,26 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -127,7 +124,7 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
|
||||
@@ -1,28 +1,26 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -105,7 +103,7 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
|
||||
@@ -1,28 +1,26 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -67,7 +65,8 @@ async def main():
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-5-sonnet-20240620",
|
||||
# model="claude-3-5-sonnet-20240620",
|
||||
model="claude-3-5-sonnet-latest",
|
||||
enable_prompt_caching_beta=True,
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
@@ -160,8 +159,8 @@ If you need to use a tool, simply use the tool. Do not tell the user the tool yo
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
global video_participant_id
|
||||
video_participant_id = participant["id"]
|
||||
transport.capture_participant_transcription(video_participant_id)
|
||||
transport.capture_participant_video(video_participant_id, framerate=0)
|
||||
await transport.capture_participant_transcription(video_participant_id)
|
||||
await transport.capture_participant_video(video_participant_id, framerate=0)
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user