Compare commits
582 Commits
fix/event-
...
fix/self-r
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b873198a59 | ||
|
|
5b696bd4ae | ||
|
|
b67af19d47 | ||
|
|
6d9c07b945 | ||
|
|
18429f80f1 | ||
|
|
0a54dc9721 | ||
|
|
521f669051 | ||
|
|
abb20f34ba | ||
|
|
b1e72ad4b7 | ||
|
|
f610fb95f9 | ||
|
|
827032fefb | ||
|
|
af4ef95dc6 | ||
|
|
0370bb15e4 | ||
|
|
2b3595485f | ||
|
|
63c664becb | ||
|
|
fecf462139 | ||
|
|
023063759a | ||
|
|
c49eda98e7 | ||
|
|
5d07326e36 | ||
|
|
fa659311b6 | ||
|
|
125c423356 | ||
|
|
c9615c8db6 | ||
|
|
28c542f6ed | ||
|
|
5708c81b93 | ||
|
|
82ce3ea8de | ||
|
|
62ada92188 | ||
|
|
273692421f | ||
|
|
0a3e212f93 | ||
|
|
43d686c622 | ||
|
|
4d136e1e28 | ||
|
|
2024285c75 | ||
|
|
bc830c16f1 | ||
|
|
18630c9478 | ||
|
|
3a8d3cc841 | ||
|
|
2963c7589d | ||
|
|
63caa403cb | ||
|
|
846cf0794d | ||
|
|
498349c17e | ||
|
|
474b27305f | ||
|
|
20509e8f96 | ||
|
|
5b2fa69bdc | ||
|
|
4f8cacc769 | ||
|
|
0145fb4ea0 | ||
|
|
8e52df7f03 | ||
|
|
8ee99e37ff | ||
|
|
bae4211369 | ||
|
|
859cd7c920 | ||
|
|
d608c400f9 | ||
|
|
94e93bed83 | ||
|
|
b1cee140b9 | ||
|
|
352361bdd2 | ||
|
|
baa61468a1 | ||
|
|
7501ba2e45 | ||
|
|
200716e8fe | ||
|
|
50ef4909e3 | ||
|
|
63df4642b5 | ||
|
|
43869a499d | ||
|
|
d2bf3952ec | ||
|
|
92c380ee77 | ||
|
|
a55ba40921 | ||
|
|
fb1bfd03dd | ||
|
|
a0a7b3101d | ||
|
|
39dc4ba99c | ||
|
|
a5b5a8e5cf | ||
|
|
1daea78b91 | ||
|
|
6066eec853 | ||
|
|
cd379671aa | ||
|
|
8006223911 | ||
|
|
247f0bbcd3 | ||
|
|
3537420d91 | ||
|
|
65fb88e61e | ||
|
|
b345f48ac1 | ||
|
|
f181e12d8f | ||
|
|
36de6003d0 | ||
|
|
dba4de77bf | ||
|
|
507765625f | ||
|
|
8f5e5e8e7c | ||
|
|
c682a44bb6 | ||
|
|
cb7023681f | ||
|
|
012ef41ff4 | ||
|
|
f6bb5fa124 | ||
|
|
2489c76bc6 | ||
|
|
73cb96bf66 | ||
|
|
79ec61d1d8 | ||
|
|
ca440594fe | ||
|
|
6c25dd4aa2 | ||
|
|
09bb6bb03b | ||
|
|
746fdfbfef | ||
|
|
f7af9f1efd | ||
|
|
a5f95acaf5 | ||
|
|
e50b138ab2 | ||
|
|
3640c7a2dd | ||
|
|
2454bedf29 | ||
|
|
3adb2f50a6 | ||
|
|
01b7a93e08 | ||
|
|
347eaf582d | ||
|
|
25ca296477 | ||
|
|
3fce88555f | ||
|
|
9e6f27c9f1 | ||
|
|
94f01af545 | ||
|
|
432870cc36 | ||
|
|
e065907745 | ||
|
|
b7a5ca3d1e | ||
|
|
9569625f03 | ||
|
|
18afe37bd1 | ||
|
|
2b9777b812 | ||
|
|
8866ab1585 | ||
|
|
f0995164d9 | ||
|
|
136732afae | ||
|
|
3410eb82b3 | ||
|
|
794811fbdb | ||
|
|
abea22ec57 | ||
|
|
08beb0264a | ||
|
|
2e15b4842c | ||
|
|
6d95a2425c | ||
|
|
4667a3d66d | ||
|
|
0bf2477d2c | ||
|
|
71a752c971 | ||
|
|
358f237507 | ||
|
|
d99a256715 | ||
|
|
dcbcab1542 | ||
|
|
a966947220 | ||
|
|
16b060d9e9 | ||
|
|
ed7fde324e | ||
|
|
beb4e86b5f | ||
|
|
e75ccd9c2f | ||
|
|
a80919ceff | ||
|
|
1fe4538982 | ||
|
|
9a48d93bd2 | ||
|
|
0c3e59ed61 | ||
|
|
ec2b38dc29 | ||
|
|
2036757b84 | ||
|
|
0574167fbd | ||
|
|
972ad93e18 | ||
|
|
ac53594967 | ||
|
|
b063d9d43b | ||
|
|
48e93beadf | ||
|
|
640940a41a | ||
|
|
f1e2001a4e | ||
|
|
12dc6c0b9e | ||
|
|
93f4402198 | ||
|
|
f3eb5b30a0 | ||
|
|
18aad05a7c | ||
|
|
883b24f577 | ||
|
|
17ab9c425f | ||
|
|
2f5e61ac55 | ||
|
|
1128c5b7fb | ||
|
|
a9a5edd8ca | ||
|
|
a98c884e31 | ||
|
|
2475697955 | ||
|
|
ba242d4875 | ||
|
|
5deb80932b | ||
|
|
4a00e6829f | ||
|
|
9d89afa7d4 | ||
|
|
92b6ecd945 | ||
|
|
314d074c61 | ||
|
|
9c627e7292 | ||
|
|
ad179b0852 | ||
|
|
5128089d42 | ||
|
|
87a79df048 | ||
|
|
24f90715e3 | ||
|
|
e00b98343e | ||
|
|
ad1bec4583 | ||
|
|
a47d7f98ee | ||
|
|
19cd242261 | ||
|
|
9bb712a47b | ||
|
|
1dccbe7c0b | ||
|
|
2dd3e2f1e7 | ||
|
|
f206aaa28d | ||
|
|
60e42f5690 | ||
|
|
88e981c013 | ||
|
|
7bd8dfe898 | ||
|
|
83039a1a35 | ||
|
|
28e8b61eb4 | ||
|
|
d47d95e1f0 | ||
|
|
79b9d929c5 | ||
|
|
dfc0856d54 | ||
|
|
f3c1cd4cd6 | ||
|
|
18d91d6df3 | ||
|
|
688f502488 | ||
|
|
7684a94c33 | ||
|
|
e27f4bccfb | ||
|
|
fa8b0aeda8 | ||
|
|
946f0f4e77 | ||
|
|
b9cf3f3225 | ||
|
|
d32c4b2f5f | ||
|
|
77a5d16a10 | ||
|
|
ca224834b2 | ||
|
|
3867bc6302 | ||
|
|
83a8379401 | ||
|
|
f2688deb0d | ||
|
|
981253c703 | ||
|
|
aa6c9797ca | ||
|
|
6305e04569 | ||
|
|
3ff9b7b5ad | ||
|
|
cc797ba3cf | ||
|
|
91c8122c17 | ||
|
|
944ac92593 | ||
|
|
ca0d2e68c3 | ||
|
|
631463e573 | ||
|
|
6a553367a2 | ||
|
|
00ec6c77ea | ||
|
|
ee6520db30 | ||
|
|
2a572aedba | ||
|
|
5e66702cf5 | ||
|
|
34b068d657 | ||
|
|
05e2a013b3 | ||
|
|
5f64dae0cf | ||
|
|
1bf8b54502 | ||
|
|
ed3ec045aa | ||
|
|
67d39a97f7 | ||
|
|
947ff03c9f | ||
|
|
a4e187e138 | ||
|
|
9f380170d7 | ||
|
|
12f27f9cda | ||
|
|
104d06551a | ||
|
|
90ad2a4e81 | ||
|
|
3494a94cac | ||
|
|
570f2d7fc0 | ||
|
|
f3d99adf8f | ||
|
|
d34f416281 | ||
|
|
5a1deb7cb4 | ||
|
|
a5fc2b1650 | ||
|
|
5cb8d91431 | ||
|
|
ce690848c0 | ||
|
|
30f51edfcd | ||
|
|
cd03d449cb | ||
|
|
57df03aade | ||
|
|
4945cfbd8f | ||
|
|
8d37d3bae7 | ||
|
|
d7b1624d3c | ||
|
|
7f65204c3b | ||
|
|
97eff414c3 | ||
|
|
5b67e76de7 | ||
|
|
b9e79bd06a | ||
|
|
d5105a78e6 | ||
|
|
a352b2d7a0 | ||
|
|
2345090b10 | ||
|
|
af562bf9a8 | ||
|
|
d4993f0dcf | ||
|
|
1790a84bfd | ||
|
|
29c53b99a4 | ||
|
|
aa5a855eab | ||
|
|
e66d6f8ffe | ||
|
|
b8ac2ba713 | ||
|
|
6eea40858e | ||
|
|
90700d10aa | ||
|
|
fa85f7bbc7 | ||
|
|
669f013970 | ||
|
|
76f63e54e2 | ||
|
|
cce5a13444 | ||
|
|
d11e1cd631 | ||
|
|
8b9da632d1 | ||
|
|
b36f7892a4 | ||
|
|
9b43cde128 | ||
|
|
6af4d872a8 | ||
|
|
22398e1410 | ||
|
|
d10467e043 | ||
|
|
cbe131636d | ||
|
|
fef9e3ea32 | ||
|
|
56d8ef2bf4 | ||
|
|
8791559351 | ||
|
|
f6c919354f | ||
|
|
93138466d6 | ||
|
|
5a5a98b497 | ||
|
|
2b4f507d37 | ||
|
|
d6f3a90662 | ||
|
|
8fb0e37965 | ||
|
|
0d45b48f7b | ||
|
|
6af4520b1f | ||
|
|
ba469e5645 | ||
|
|
bd12b60b5c | ||
|
|
54db37ea47 | ||
|
|
752e16f553 | ||
|
|
7c7408a048 | ||
|
|
8f42343927 | ||
|
|
46da6cd91b | ||
|
|
ecb02d9049 | ||
|
|
cc68e00125 | ||
|
|
e0e3b5250b | ||
|
|
55a3b10e70 | ||
|
|
e6b06414b3 | ||
|
|
6bcfb40d12 | ||
|
|
65b1a8ce36 | ||
|
|
2db3d94d06 | ||
|
|
2a26b9f7a3 | ||
|
|
4f77c532fb | ||
|
|
c3a4da4a29 | ||
|
|
84ca0b6d58 | ||
|
|
c1857d255d | ||
|
|
d50ec33079 | ||
|
|
40c84faff5 | ||
|
|
84cd9346f9 | ||
|
|
5d5b19e1d2 | ||
|
|
8d3e10f054 | ||
|
|
1665ce181a | ||
|
|
803a20cc00 | ||
|
|
90bead06ab | ||
|
|
b427d534ae | ||
|
|
b030f1178d | ||
|
|
a627597bca | ||
|
|
4c10ddb7bb | ||
|
|
a4e499dc80 | ||
|
|
ca49acfaa6 | ||
|
|
86147f15f3 | ||
|
|
5cda72d138 | ||
|
|
54e62a8177 | ||
|
|
a592b7fdf0 | ||
|
|
ba2b7c05d6 | ||
|
|
774041e9a1 | ||
|
|
763002f2bc | ||
|
|
50dedf350d | ||
|
|
d3ecbb11c1 | ||
|
|
f453227ba3 | ||
|
|
52cc64019a | ||
|
|
95689cc81c | ||
|
|
675c7c43e3 | ||
|
|
bfd19e867c | ||
|
|
acc9923c0a | ||
|
|
bdc9e7e2e4 | ||
|
|
a587e1b99a | ||
|
|
7853e5ca93 | ||
|
|
614b8e1a62 | ||
|
|
ef51c2a5c6 | ||
|
|
f42dc0d38e | ||
|
|
d87f3543c7 | ||
|
|
fee633cb92 | ||
|
|
607af91153 | ||
|
|
e779233918 | ||
|
|
604d5d0b14 | ||
|
|
342ae7af41 | ||
|
|
c92ec1552e | ||
|
|
93160f1455 | ||
|
|
e3158e1131 | ||
|
|
63a23246d5 | ||
|
|
569ea9849a | ||
|
|
a98ca9b65b | ||
|
|
c9310789dc | ||
|
|
b93e12d701 | ||
|
|
3f77da627d | ||
|
|
35d265770d | ||
|
|
9632efec8c | ||
|
|
27dbfa1eda | ||
|
|
183c0aa4ef | ||
|
|
a69a037ffa | ||
|
|
c46e7f5da0 | ||
|
|
307aeaeda0 | ||
|
|
305ab44132 | ||
|
|
b486f35c70 | ||
|
|
c92080b0d2 | ||
|
|
ddfedaf478 | ||
|
|
b1ad4d5ab0 | ||
|
|
0857aa87be | ||
|
|
fd3c5f69b7 | ||
|
|
72ab329513 | ||
|
|
7999d08b7e | ||
|
|
57821cf709 | ||
|
|
18045582a9 | ||
|
|
7be2b8cc34 | ||
|
|
671cc8eb74 | ||
|
|
b4dce656f0 | ||
|
|
253a1d1114 | ||
|
|
ca613bcb79 | ||
|
|
0423acd8a0 | ||
|
|
7eabaaa0ef | ||
|
|
bbb8b53d03 | ||
|
|
f3b72e9263 | ||
|
|
31c7fbc5ba | ||
|
|
6ab12626d6 | ||
|
|
b77a50de73 | ||
|
|
433c1b9b92 | ||
|
|
bd00587092 | ||
|
|
5a85e27cc5 | ||
|
|
11daa43b1b | ||
|
|
875614ff7a | ||
|
|
eb1bf1e446 | ||
|
|
7456a0a55f | ||
|
|
27277ed3d9 | ||
|
|
5543bc56f3 | ||
|
|
c8496dfb8e | ||
|
|
d3f4cbb620 | ||
|
|
c9f922c479 | ||
|
|
49bd3da26b | ||
|
|
f3ef488925 | ||
|
|
4f08098917 | ||
|
|
a7cd5b0322 | ||
|
|
55dadc9118 | ||
|
|
01bbf61e0d | ||
|
|
10fb77c0e2 | ||
|
|
2612fae527 | ||
|
|
c5be67f293 | ||
|
|
312caaba86 | ||
|
|
ff0eb6d286 | ||
|
|
ef6bbace98 | ||
|
|
06ec21387f | ||
|
|
bdae177125 | ||
|
|
468e159f9b | ||
|
|
a4acafd3be | ||
|
|
105824a372 | ||
|
|
55e0d4ecc4 | ||
|
|
9102e81cb8 | ||
|
|
d7d8e93a3d | ||
|
|
bf9b166464 | ||
|
|
e80e0eab29 | ||
|
|
61242e6575 | ||
|
|
8841387121 | ||
|
|
ee695ae9fe | ||
|
|
52012b0fb2 | ||
|
|
f7a1c6b719 | ||
|
|
6aa77ccc13 | ||
|
|
45b7ec4e2c | ||
|
|
1c434c6ad5 | ||
|
|
4591affba9 | ||
|
|
91346f5f37 | ||
|
|
6a66ebe332 | ||
|
|
c1d4180042 | ||
|
|
81a53c699c | ||
|
|
60168f7f69 | ||
|
|
23d7608e5f | ||
|
|
99242c0a93 | ||
|
|
3a71865cf4 | ||
|
|
ecf2e69f3f | ||
|
|
febd52274d | ||
|
|
1542d922e7 | ||
|
|
15d5d1159e | ||
|
|
884630a6bd | ||
|
|
1cf137c6a8 | ||
|
|
98fcfd7c91 | ||
|
|
2f23f2e39c | ||
|
|
9c6b11cecf | ||
|
|
fc1444c9d6 | ||
|
|
ea94939add | ||
|
|
0c69ae6371 | ||
|
|
8b88280bb1 | ||
|
|
960d0faea5 | ||
|
|
b9390ccb1b | ||
|
|
061a0dc43d | ||
|
|
328bbe069f | ||
|
|
dc32ecc872 | ||
|
|
ca2eb1904f | ||
|
|
4bce58f270 | ||
|
|
7572d63f8f | ||
|
|
3c463c9416 | ||
|
|
bd618d64e3 | ||
|
|
a824660df7 | ||
|
|
58b9019852 | ||
|
|
afcdef8c81 | ||
|
|
bd92104fb3 | ||
|
|
34e9f224a8 | ||
|
|
dca7f3b5b0 | ||
|
|
70a85cd192 | ||
|
|
91e86658b7 | ||
|
|
0a8588669c | ||
|
|
0e99400148 | ||
|
|
648f20db6d | ||
|
|
09b5b6b12d | ||
|
|
0e6a423955 | ||
|
|
dc8972cd94 | ||
|
|
e4e2231958 | ||
|
|
18b3ee743b | ||
|
|
65b8e0e89c | ||
|
|
b77f8b065f | ||
|
|
5fd43faec3 | ||
|
|
abebcf37bd | ||
|
|
ca4e3c79f9 | ||
|
|
e8d1bec03b | ||
|
|
f0cc54589e | ||
|
|
22b9aac2ff | ||
|
|
7f86f4ac27 | ||
|
|
dcab79753b | ||
|
|
bdded9b026 | ||
|
|
1e1e275fea | ||
|
|
effb6aa8f4 | ||
|
|
a4a9bae79e | ||
|
|
c943ef9261 | ||
|
|
f05809520b | ||
|
|
ec17dc6626 | ||
|
|
4e85e81d9b | ||
|
|
a1cc88a233 | ||
|
|
61a230ec53 | ||
|
|
a13380b574 | ||
|
|
2a927189d9 | ||
|
|
a90c15362c | ||
|
|
d3bdd2d246 | ||
|
|
465ae4f706 | ||
|
|
a0d801b658 | ||
|
|
35919a84e3 | ||
|
|
f94a60f381 | ||
|
|
a446bca72d | ||
|
|
8ae834366b | ||
|
|
a4acc12f91 | ||
|
|
e93112e76e | ||
|
|
680bcaac66 | ||
|
|
d2ac9006a2 | ||
|
|
bcb019e8ab | ||
|
|
4ea546785f | ||
|
|
f128cdd19a | ||
|
|
7921bce4af | ||
|
|
cadced3f79 | ||
|
|
8951442b8e | ||
|
|
7e6e3031e7 | ||
|
|
3b3c7aa8cc | ||
|
|
308829f92b | ||
|
|
82a799e63e | ||
|
|
6b5bcae86f | ||
|
|
836073849c | ||
|
|
b13b65d6e2 | ||
|
|
3d545b718d | ||
|
|
f2fa5d9733 | ||
|
|
76b774072c | ||
|
|
b6341ffaa5 | ||
|
|
29fae67c9e | ||
|
|
718ea1c15e | ||
|
|
8e09d94614 | ||
|
|
de73e28563 | ||
|
|
55250b4f7e | ||
|
|
281145a991 | ||
|
|
7bd32e2fe5 | ||
|
|
8f05d95f50 | ||
|
|
87c12f3098 | ||
|
|
9c0bf89247 | ||
|
|
6e44a2ab49 | ||
|
|
7aa7b86aed | ||
|
|
5ad9faeb4c | ||
|
|
9e8f8b45c6 | ||
|
|
0ee11ad333 | ||
|
|
124a3c35af | ||
|
|
054e504868 | ||
|
|
e85a00cc0e | ||
|
|
cc61cdbba3 | ||
|
|
62f4708d43 | ||
|
|
ba0ddb1832 | ||
|
|
eacd2a4b71 | ||
|
|
7ed110650d | ||
|
|
4a724379fc | ||
|
|
768d3958dd | ||
|
|
5f9ff8bd58 | ||
|
|
59ed422052 | ||
|
|
7e0ca113af | ||
|
|
13c52e0e6d | ||
|
|
a787fd9cd8 | ||
|
|
14495c425a | ||
|
|
461bd0a2e0 | ||
|
|
bd45ce2b4e | ||
|
|
a266644b06 | ||
|
|
03faadd7f9 | ||
|
|
bf43032652 | ||
|
|
fa6f924b31 | ||
|
|
a010a020fd | ||
|
|
655006aff5 | ||
|
|
671dc8cd9b | ||
|
|
9a718ded1e | ||
|
|
024809b39a | ||
|
|
6cf0d53d00 | ||
|
|
778dacc9a8 | ||
|
|
06b3ecd2d6 | ||
|
|
b4d143e39b | ||
|
|
c89083e72e | ||
|
|
1ac811ab32 | ||
|
|
f6359d460e | ||
|
|
f03a7175c7 | ||
|
|
aed44c863a | ||
|
|
fa5da3b0be | ||
|
|
7e82a0cf49 | ||
|
|
cddd6d5b0a | ||
|
|
11cf891ac8 | ||
|
|
c89ae717fe | ||
|
|
562bdd3084 | ||
|
|
cc4c3650e1 | ||
|
|
dfc1f09b77 | ||
|
|
0b1a4792b8 | ||
|
|
14bd3b1b32 | ||
|
|
f733e77496 | ||
|
|
5fc46cc450 | ||
|
|
4a9eb82f92 | ||
|
|
990d8386e4 | ||
|
|
ce7d823770 | ||
|
|
0b93c3f900 | ||
|
|
6fa797c8e4 | ||
|
|
38506f51f7 | ||
|
|
1ceb01665f | ||
|
|
8dfc59be13 |
5
.claude/settings.json
Normal file
5
.claude/settings.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"attribution": {
|
||||
"commit": ""
|
||||
}
|
||||
}
|
||||
47
.claude/skills/changelog/SKILL.md
Normal file
47
.claude/skills/changelog/SKILL.md
Normal file
@@ -0,0 +1,47 @@
|
||||
---
|
||||
name: changelog
|
||||
description: Create changelog files for important commits in a PR
|
||||
---
|
||||
|
||||
Create changelog files for the important commits in this PR. The PR number is provided as an argument.
|
||||
|
||||
## Instructions
|
||||
|
||||
1. Skip changelog for: documentation-only, internal refactoring, test-only, CI changes.
|
||||
|
||||
2. First, check what commits are on the current branch compared to main:
|
||||
```
|
||||
git log main..HEAD --oneline
|
||||
```
|
||||
|
||||
3. For each significant change, create a changelog file in the `changelog/` folder using the format:
|
||||
Allowed types: `added`, `changed`, `deprecated`, `removed`, `fixed`, `security`, `performance`, `other`
|
||||
- `{PR_NUMBER}.added.md` - for new features
|
||||
- `{PR_NUMBER}.added.2.md`, `{PR_NUMBER}.added.3.md` - for additional entries of the same type
|
||||
- `{PR_NUMBER}.changed.md` - for changes to existing functionality
|
||||
- `{PR_NUMBER}.fixed.md` - for bug fixes
|
||||
- `{PR_NUMBER}.deprecated.md` - for deprecations
|
||||
- `{PR_NUMBER}.removed.md` - for removed features
|
||||
- `{PR_NUMBER}.security.md` - for security fixes
|
||||
- `{PR_NUMBER}.performance.md` - for performance improvements
|
||||
- `{PR_NUMBER}.other.md` - for other changes
|
||||
|
||||
4. Each changelog file should at least contain a main single line starting with `- ` followed by a clear description of the change. No line wrapping.
|
||||
|
||||
5. If the change is complicated, changelog files can have indented lines after the main line with additional details or code samples.
|
||||
|
||||
6. Use ⚠️ emoji prefix for breaking changes.
|
||||
|
||||
## Example
|
||||
|
||||
For PR #3519 with a new feature and a bug fix:
|
||||
|
||||
`changelog/3519.added.md`:
|
||||
```
|
||||
- Added `SomeNewFeature` for doing something useful.
|
||||
```
|
||||
|
||||
`changelog/3519.fixed.md`:
|
||||
```
|
||||
- Fixed an issue where something was not working correctly.
|
||||
```
|
||||
307
.claude/skills/cleanup/SKILL.md
Normal file
307
.claude/skills/cleanup/SKILL.md
Normal file
@@ -0,0 +1,307 @@
|
||||
# Code Cleanup Skill
|
||||
|
||||
The **Code Cleanup Skill** reviews, refactors, and documents code changes in your current branch, ensuring alignment with **Pipecat’s architecture, coding standards, and example patterns**.
|
||||
It focuses on **readability, correctness, performance, and consistency**, while avoiding breaking changes.
|
||||
|
||||
---
|
||||
|
||||
## Skill Overview
|
||||
|
||||
This skill analyzes all changes introduced in your branch and performs the following actions:
|
||||
|
||||
1. **Analyze Branch Changes**
|
||||
- Review uncommitted changes and outgoing commits
|
||||
2. **Refactor for Readability**
|
||||
- Improve clarity, naming, structure, and modern Python usage
|
||||
3. **Enhance Performance**
|
||||
- Identify safe, conservative optimization opportunities
|
||||
4. **Add Documentation**
|
||||
- Apply Pipecat-style, Google-format docstrings
|
||||
5. **Ensure Pattern Consistency**
|
||||
- Match existing Pipecat services, pipelines, and examples
|
||||
6. **Validate Examples**
|
||||
- Ensure examples follow foundational patterns (e.g. `07-interruptible.py`)
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
|
||||
Invoke the skill using any of the following commands:
|
||||
|
||||
- “Clean up my branch code”
|
||||
- “Refactor the changes in my branch”
|
||||
- “Review and improve my branch code”
|
||||
- `/cleanup`
|
||||
|
||||
---
|
||||
|
||||
## What This Skill Does
|
||||
|
||||
### 1. Analyze Branch Changes
|
||||
|
||||
The skill retrieves all uncommitted changes and outgoing commits to understand:
|
||||
|
||||
- New files added
|
||||
- Modified files
|
||||
- Code additions and deletions
|
||||
- Overall scope and intent of changes
|
||||
|
||||
---
|
||||
|
||||
### 2. Code Refactoring
|
||||
|
||||
#### Readability Improvements
|
||||
|
||||
- Replace tuples with named classes or dataclasses
|
||||
- Improve variable, method, and class naming
|
||||
- Extract complex logic into well-named helper methods
|
||||
- Add missing type hints
|
||||
- Simplify nested or complex conditionals
|
||||
- Replace deprecated methods and features
|
||||
- Normalize formatting to match Pipecat style
|
||||
|
||||
#### Performance Enhancements
|
||||
|
||||
- Identify inefficient loops or repeated work
|
||||
- Suggest appropriate data structures
|
||||
- Optimize async workflows and I/O
|
||||
- Remove redundant operations
|
||||
|
||||
> Performance changes are conservative and non-breaking.
|
||||
|
||||
---
|
||||
|
||||
### 3. Documentation
|
||||
|
||||
Documentation follows **Google-style docstrings**, consistent with Pipecat conventions.
|
||||
|
||||
#### Class Documentation
|
||||
|
||||
```python
|
||||
class ExampleService:
|
||||
"""Brief one-line description.
|
||||
|
||||
Detailed explanation of the class purpose, responsibilities,
|
||||
and important behaviors.
|
||||
|
||||
Supported features:
|
||||
|
||||
- Feature 1
|
||||
- Feature 2
|
||||
- Feature 3
|
||||
"""
|
||||
```
|
||||
|
||||
#### Method Documentation
|
||||
|
||||
```python
|
||||
def process_data(self, data: str, options: Optional[dict] = None) -> bool:
|
||||
"""Process incoming data with optional configuration.
|
||||
|
||||
Args:
|
||||
data: The input data to process.
|
||||
options: Optional configuration dictionary.
|
||||
|
||||
Returns:
|
||||
True if processing succeeded, False otherwise.
|
||||
|
||||
Raises:
|
||||
ValueError: If data is empty or invalid.
|
||||
"""
|
||||
```
|
||||
|
||||
#### Pydantic Model Parameters
|
||||
|
||||
```python
|
||||
class InputParams(BaseModel):
|
||||
"""Configuration parameters for the service.
|
||||
|
||||
Parameters:
|
||||
timeout: Request timeout in seconds.
|
||||
retry_count: Number of retry attempts.
|
||||
enable_logging: Whether to enable debug logging.
|
||||
"""
|
||||
|
||||
timeout: Optional[float] = None
|
||||
retry_count: int = 3
|
||||
enable_logging: bool = False
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4. Pattern Consistency Checks
|
||||
|
||||
#### Service Classes
|
||||
|
||||
- Correct inheritance (`TTSService`, `STTService`, `LLMService`)
|
||||
- Consistent constructor signatures
|
||||
- Frame emission patterns
|
||||
- Metrics support:
|
||||
- `can_generate_metrics()`
|
||||
- TTFB metrics
|
||||
- Usage metrics
|
||||
- Alignment with similar existing services
|
||||
|
||||
#### Examples
|
||||
|
||||
Validated against `examples/foundational/07-interruptible.py`:
|
||||
|
||||
- Proper `create_transport()` usage
|
||||
- Correct pipeline structure
|
||||
- Task setup and observers
|
||||
- Event handler registration
|
||||
- Runner and bot entrypoint consistency
|
||||
|
||||
---
|
||||
|
||||
### 5. Specific Implementation Patterns
|
||||
|
||||
#### Service Implementation
|
||||
|
||||
```python
|
||||
class ExampleTTSService(TTSService):
|
||||
|
||||
def __init__(self, *, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._api_key = api_key or os.getenv("SERVICE_API_KEY")
|
||||
|
||||
def can_generate_metrics(self) -> bool:
|
||||
return True
|
||||
|
||||
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
||||
try:
|
||||
await self.start_ttfb_metrics()
|
||||
yield TTSStartedFrame()
|
||||
# ... processing ...
|
||||
yield TTSAudioRawFrame(...)
|
||||
finally:
|
||||
await self.stop_ttfb_metrics()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### Example Structure Pattern
|
||||
|
||||
```python
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(...),
|
||||
"twilio": lambda: FastAPIWebsocketParams(...),
|
||||
"webrtc": lambda: TransportParams(...),
|
||||
}
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
stt = DeepgramSTTService(...)
|
||||
tts = SomeTTSService(...)
|
||||
llm = OpenAILLMService(...)
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(...)
|
||||
|
||||
pipeline = Pipeline([...])
|
||||
task = PipelineTask(pipeline, params=..., observers=[...])
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
await runner.run(task)
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Execution Flow
|
||||
|
||||
1. Fetch uncommitted and outgoing changes
|
||||
2. Categorize files (services, examples, tests, utilities)
|
||||
3. Analyze each file:
|
||||
- Readability
|
||||
- Performance
|
||||
- Documentation
|
||||
- Pattern consistency
|
||||
4. Generate actionable recommendations
|
||||
5. Apply Pipecat standards
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
### Before: Tuple Usage
|
||||
|
||||
```python
|
||||
def get_audio_info(self) -> Tuple[int, int]:
|
||||
return (48000, 1)
|
||||
```
|
||||
|
||||
### After: Named Class
|
||||
|
||||
```python
|
||||
class AudioInfo:
|
||||
"""Audio configuration information.
|
||||
|
||||
Parameters:
|
||||
sample_rate: Sample rate in Hz.
|
||||
num_channels: Number of audio channels.
|
||||
"""
|
||||
|
||||
sample_rate: int
|
||||
num_channels: int
|
||||
|
||||
def get_audio_info(self) -> AudioInfo:
|
||||
return AudioInfo(sample_rate=48000, num_channels=1)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Before: Missing Documentation
|
||||
|
||||
```python
|
||||
class NewTTSService(TTSService):
|
||||
def __init__(self, api_key: str, voice: str):
|
||||
self._api_key = api_key
|
||||
self._voice = voice
|
||||
```
|
||||
|
||||
### After: Fully Documented
|
||||
|
||||
```python
|
||||
class NewTTSService(TTSService):
|
||||
"""Text-to-speech service using NewProvider API.
|
||||
|
||||
Streams PCM audio and emits TTSAudioRawFrame frames compatible
|
||||
with Pipecat transports.
|
||||
|
||||
Supported features:
|
||||
- Text-to-speech synthesis
|
||||
- Streaming PCM audio
|
||||
- Voice customization
|
||||
- TTFB metrics
|
||||
"""
|
||||
|
||||
def __init__(self, *, api_key: str, voice: str, **kwargs):
|
||||
"""Initialize the NewTTSService.
|
||||
|
||||
Args:
|
||||
api_key: API key for authentication.
|
||||
voice: Voice identifier to use.
|
||||
**kwargs: Additional arguments passed to the parent service.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._api_key = api_key
|
||||
self.set_voice(voice)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
- Non-breaking improvements only
|
||||
- Backward compatibility preserved
|
||||
- Conservative performance changes
|
||||
- Google-style docstrings
|
||||
- Pattern checks follow recent Pipecat code
|
||||
107
.claude/skills/code-review/SKILL.md
Normal file
107
.claude/skills/code-review/SKILL.md
Normal file
@@ -0,0 +1,107 @@
|
||||
---
|
||||
name: code-review
|
||||
description: Automated code review for pull requests using multiple specialized agents
|
||||
disable-model-invocation: true
|
||||
allowed-tools: Bash(gh issue view:*), Bash(gh search:*), Bash(gh issue list:*), Bash(gh pr comment:*), Bash(gh pr diff:*), Bash(gh pr view:*), Bash(gh pr list:*)
|
||||
---
|
||||
|
||||
Provide a code review for the given pull request.
|
||||
|
||||
**Agent assumptions (applies to all agents and subagents):**
|
||||
|
||||
- All tools are functional and will work without error. Do not test tools or make exploratory calls. Make sure this is clear to every subagent that is launched.
|
||||
- Only call a tool if it is required to complete the task. Every tool call should have a clear purpose.
|
||||
|
||||
To do this, follow these steps precisely:
|
||||
|
||||
1. Launch a haiku agent to check if any of the following are true:
|
||||
- The pull request is closed
|
||||
- The pull request is a draft
|
||||
- The pull request does not need code review (e.g. automated PR, trivial change that is obviously correct)
|
||||
- Claude has already commented on this PR (check `gh pr view <PR> --comments` for comments left by claude)
|
||||
|
||||
If any condition is true, stop and do not proceed.
|
||||
|
||||
Note: Still review Claude generated PR's.
|
||||
|
||||
2. Launch a haiku agent to return a list of file paths (not their contents) for all relevant CLAUDE.md files including:
|
||||
- The root CLAUDE.md file, if it exists
|
||||
- Any CLAUDE.md files in directories containing files modified by the pull request
|
||||
|
||||
3. Launch a sonnet agent to view the pull request and return a summary of the changes
|
||||
|
||||
4. Launch 4 agents in parallel to independently review the changes. Each agent should return the list of issues, where each issue includes a description and the reason it was flagged (e.g. "CLAUDE.md adherence", "bug"). The agents should do the following:
|
||||
|
||||
Agents 1 + 2: CLAUDE.md compliance sonnet agents
|
||||
Audit changes for CLAUDE.md compliance in parallel. Note: When evaluating CLAUDE.md compliance for a file, you should only consider CLAUDE.md files that share a file path with the file or parents.
|
||||
|
||||
Agent 3: Opus bug agent (parallel subagent with agent 4)
|
||||
Scan for obvious bugs. Focus only on the diff itself without reading extra context. Flag only significant bugs; ignore nitpicks and likely false positives. Do not flag issues that you cannot validate without looking at context outside of the git diff.
|
||||
|
||||
Agent 4: Opus bug agent (parallel subagent with agent 3)
|
||||
Look for problems that exist in the introduced code. This could be security issues, incorrect logic, etc. Only look for issues that fall within the changed code.
|
||||
|
||||
**CRITICAL: We only want HIGH SIGNAL issues.** Flag issues where:
|
||||
- The code will fail to compile or parse (syntax errors, type errors, missing imports, unresolved references)
|
||||
- The code will definitely produce wrong results regardless of inputs (clear logic errors)
|
||||
- Clear, unambiguous CLAUDE.md violations where you can quote the exact rule being broken
|
||||
|
||||
Do NOT flag:
|
||||
- Code style or quality concerns
|
||||
- Potential issues that depend on specific inputs or state
|
||||
- Subjective suggestions or improvements
|
||||
|
||||
If you are not certain an issue is real, do not flag it. False positives erode trust and waste reviewer time.
|
||||
|
||||
In addition to the above, each subagent should be told the PR title and description. This will help provide context regarding the author's intent.
|
||||
|
||||
5. For each issue found in the previous step by agents 3 and 4, launch parallel subagents to validate the issue. These subagents should get the PR title and description along with a description of the issue. The agent's job is to review the issue to validate that the stated issue is truly an issue with high confidence. For example, if an issue such as "variable is not defined" was flagged, the subagent's job would be to validate that is actually true in the code. Another example would be CLAUDE.md issues. The agent should validate that the CLAUDE.md rule that was violated is scoped for this file and is actually violated. Use Opus subagents for bugs and logic issues, and sonnet agents for CLAUDE.md violations.
|
||||
|
||||
6. Filter out any issues that were not validated in step 5. This step will give us our list of high signal issues for our review.
|
||||
|
||||
7. If issues were found, skip to step 8 to post comments.
|
||||
|
||||
If NO issues were found, post a summary comment using `gh pr comment` (if `--comment` argument is provided):
|
||||
"No issues found. Checked for bugs and CLAUDE.md compliance."
|
||||
|
||||
8. Create a list of all comments that you plan on leaving. This is only for you to make sure you are comfortable with the comments. Do not post this list anywhere.
|
||||
|
||||
9. Post inline comments for each issue using `gh pr review` with inline comments. For each comment:
|
||||
- Provide a brief description of the issue
|
||||
- For small, self-contained fixes, include a committable suggestion block
|
||||
- For larger fixes (6+ lines, structural changes, or changes spanning multiple locations), describe the issue and suggested fix without a suggestion block
|
||||
- Never post a committable suggestion UNLESS committing the suggestion fixes the issue entirely. If follow up steps are required, do not leave a committable suggestion.
|
||||
|
||||
**IMPORTANT: Only post ONE comment per unique issue. Do not post duplicate comments.**
|
||||
|
||||
Use this list when evaluating issues in Steps 4 and 5 (these are false positives, do NOT flag):
|
||||
|
||||
- Pre-existing issues
|
||||
- Something that appears to be a bug but is actually correct
|
||||
- Pedantic nitpicks that a senior engineer would not flag
|
||||
- Issues that a linter will catch (do not run the linter to verify)
|
||||
- General code quality concerns (e.g., lack of test coverage, general security issues) unless explicitly required in CLAUDE.md
|
||||
- Issues mentioned in CLAUDE.md but explicitly silenced in the code (e.g., via a lint ignore comment)
|
||||
|
||||
Notes:
|
||||
|
||||
- Use gh CLI to interact with GitHub (e.g., fetch pull requests, create comments). Do not use web fetch.
|
||||
- Create a todo list before starting.
|
||||
- You must cite and link each issue in inline comments (e.g., if referring to a CLAUDE.md, include a link to it).
|
||||
- If no issues are found, post a comment with the following format:
|
||||
|
||||
---
|
||||
|
||||
## Code review
|
||||
|
||||
No issues found. Checked for bugs and CLAUDE.md compliance.
|
||||
|
||||
---
|
||||
|
||||
- When linking to code in inline comments, follow the following format precisely, otherwise the Markdown preview won't render correctly: `https://github.com/OWNER/REPO/blob/FULL_SHA/path/to/file.py#L10-L15`
|
||||
- Requires full git sha
|
||||
- You must provide the full sha. Commands like `https://github.com/owner/repo/blob/$(git rev-parse HEAD)/foo/bar` will not work, since your comment will be directly rendered in Markdown.
|
||||
- Repo name must match the repo you're code reviewing
|
||||
- # sign after the file name
|
||||
- Line range format is L[start]-L[end]
|
||||
- Provide at least 1 line of context before and after, centered on the line you are commenting about (eg. if you are commenting about lines 5-6, you should link to `L4-7`)
|
||||
257
.claude/skills/docstring/SKILL.md
Normal file
257
.claude/skills/docstring/SKILL.md
Normal file
@@ -0,0 +1,257 @@
|
||||
---
|
||||
name: docstring
|
||||
description: Document a Python module and its classes using Google style
|
||||
---
|
||||
|
||||
Document a Python module and its classes using Google-style docstrings following project conventions. The class name is provided as an argument.
|
||||
|
||||
## Instructions
|
||||
|
||||
1. First, find the class in the codebase:
|
||||
```
|
||||
Search for "class ClassName" in src/pipecat/
|
||||
```
|
||||
|
||||
2. If multiple files contain that class name:
|
||||
- List all matches with their file paths
|
||||
- Ask the user which one they want to document
|
||||
- Wait for confirmation before proceeding
|
||||
|
||||
3. Once the file is identified, read the module to understand its structure:
|
||||
- Identify all classes, functions, and important type aliases
|
||||
- Understand the purpose of each component
|
||||
|
||||
4. Apply documentation in this order:
|
||||
- Module docstring (at top, after imports)
|
||||
- Class docstrings
|
||||
- `__init__` methods (always document constructor parameters)
|
||||
- Public methods (not starting with `_`)
|
||||
- Dataclass/config classes with field descriptions
|
||||
|
||||
5. Skip documentation for:
|
||||
- Private methods (starting with `_`)
|
||||
- Simple dunder methods (`__str__`, `__repr__`, `__post_init__`)
|
||||
- Very simple pass-through properties
|
||||
- **Already documented code** - If a class, method, or function already has a complete docstring that follows the project style, do not modify it. A docstring is complete if it has:
|
||||
- A one-line summary
|
||||
- Args section (if it has parameters)
|
||||
- Returns section (if it returns something meaningful)
|
||||
- Only add or improve documentation where it is missing or incomplete
|
||||
|
||||
## Module Docstring Format
|
||||
|
||||
```python
|
||||
"""[One-line description of module purpose].
|
||||
|
||||
[Optional: Longer explanation of functionality, key classes, or use cases.]
|
||||
"""
|
||||
```
|
||||
|
||||
Example:
|
||||
```python
|
||||
"""Neuphonic text-to-speech service implementations.
|
||||
|
||||
This module provides WebSocket and HTTP-based integrations with Neuphonic's
|
||||
text-to-speech API for real-time audio synthesis.
|
||||
"""
|
||||
```
|
||||
|
||||
## Class Docstring Format
|
||||
|
||||
```python
|
||||
class ClassName:
|
||||
"""One-line summary describing what the class does.
|
||||
|
||||
[Longer description explaining purpose, behavior, and key features.
|
||||
Use action-oriented language.]
|
||||
|
||||
[Optional: Event handlers, usage notes, or important caveats.]
|
||||
"""
|
||||
```
|
||||
|
||||
Example:
|
||||
```python
|
||||
class FrameProcessor(BaseObject):
|
||||
"""Base class for all frame processors in the pipeline.
|
||||
|
||||
Frame processors are the building blocks of Pipecat pipelines, they can be
|
||||
linked to form complex processing pipelines. They receive frames, process
|
||||
them, and pass them to the next or previous processor in the chain.
|
||||
|
||||
Event handlers available:
|
||||
|
||||
- on_before_process_frame: Called before a frame is processed
|
||||
- on_after_process_frame: Called after a frame is processed
|
||||
|
||||
Example::
|
||||
|
||||
@processor.event_handler("on_before_process_frame")
|
||||
async def on_before_process_frame(processor, frame):
|
||||
...
|
||||
|
||||
@processor.event_handler("on_after_process_frame")
|
||||
async def on_after_process_frame(processor, frame):
|
||||
...
|
||||
"""
|
||||
```
|
||||
|
||||
Note: When listing event handlers, do NOT use backticks. Include an `Example::` section (with double colon for Sphinx) showing the decorator pattern and function signature for each event.
|
||||
|
||||
## Constructor (`__init__`) Format
|
||||
|
||||
```python
|
||||
def __init__(self, *, param1: Type, param2: Type = default, **kwargs):
|
||||
"""Initialize the [ClassName].
|
||||
|
||||
Args:
|
||||
param1: Description of param1 and its purpose.
|
||||
param2: Description of param2. Defaults to [default].
|
||||
**kwargs: Additional arguments passed to parent class.
|
||||
"""
|
||||
```
|
||||
|
||||
Example:
|
||||
```python
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key: str,
|
||||
voice_id: Optional[str] = None,
|
||||
sample_rate: Optional[int] = 22050,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the Neuphonic TTS service.
|
||||
|
||||
Args:
|
||||
api_key: Neuphonic API key for authentication.
|
||||
voice_id: ID of the voice to use for synthesis.
|
||||
sample_rate: Audio sample rate in Hz. Defaults to 22050.
|
||||
**kwargs: Additional arguments passed to parent InterruptibleTTSService.
|
||||
"""
|
||||
```
|
||||
|
||||
## Method Docstring Format
|
||||
|
||||
```python
|
||||
async def method_name(self, param1: Type) -> ReturnType:
|
||||
"""One-line summary of what method does.
|
||||
|
||||
[Longer description if behavior isn't obvious.]
|
||||
|
||||
Args:
|
||||
param1: Description of param1.
|
||||
|
||||
Returns:
|
||||
Description of return value.
|
||||
|
||||
Raises:
|
||||
ExceptionType: When this exception is raised.
|
||||
"""
|
||||
```
|
||||
|
||||
Example:
|
||||
```python
|
||||
async def put(self, item: Tuple[Frame, FrameDirection, FrameCallback]):
|
||||
"""Put an item into the priority queue.
|
||||
|
||||
System frames (`SystemFrame`) have higher priority than any other
|
||||
frames. If a non-frame item is provided it will have the highest priority.
|
||||
|
||||
Args:
|
||||
item: The item to enqueue.
|
||||
"""
|
||||
```
|
||||
|
||||
## Dataclass/Config Format
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ConfigName:
|
||||
"""One-line description of configuration.
|
||||
|
||||
[Explanation of when/how to use this config.]
|
||||
|
||||
Parameters:
|
||||
field1: Description of field1.
|
||||
field2: Description of field2. Defaults to [default].
|
||||
"""
|
||||
|
||||
field1: Type
|
||||
field2: Type = default_value
|
||||
```
|
||||
|
||||
Example:
|
||||
```python
|
||||
@dataclass
|
||||
class FrameProcessorSetup:
|
||||
"""Configuration parameters for frame processor initialization.
|
||||
|
||||
Parameters:
|
||||
clock: The clock instance for timing operations.
|
||||
task_manager: The task manager for handling async operations.
|
||||
observer: Optional observer for monitoring frame processing events.
|
||||
"""
|
||||
|
||||
clock: BaseClock
|
||||
task_manager: BaseTaskManager
|
||||
observer: Optional[BaseObserver] = None
|
||||
```
|
||||
|
||||
## Enum Documentation Format
|
||||
|
||||
```python
|
||||
class EnumName(Enum):
|
||||
"""One-line description of the enum purpose.
|
||||
|
||||
[Longer description of how the enum is used.]
|
||||
|
||||
Parameters:
|
||||
VALUE1: Description of VALUE1.
|
||||
VALUE2: Description of VALUE2.
|
||||
"""
|
||||
|
||||
VALUE1 = 1
|
||||
VALUE2 = 2
|
||||
```
|
||||
|
||||
## Writing Style Guidelines
|
||||
|
||||
- **Concise and professional** - No casual language or filler words
|
||||
- **Action-oriented** - Start with verbs: "Processes...", "Manages...", "Converts..."
|
||||
- **Purpose before implementation** - Explain WHY before HOW
|
||||
- **Clear parameter descriptions** - Include type hints, defaults, and purpose
|
||||
- **No redundant type info** - Type hints are in the signature, don't repeat in description
|
||||
- **Use backticks for code references** - Wrap class names, method names, event names, parameter names, and code snippets in backticks
|
||||
|
||||
Good: "Neuphonic API key for authentication."
|
||||
Bad: "str: The API key (string) that is used for authenticating with Neuphonic."
|
||||
|
||||
Good: "Triggers `on_speech_started` when the `VADAnalyzer` detects speech."
|
||||
Bad: "Triggers on_speech_started when the VADAnalyzer detects speech."
|
||||
|
||||
## Deprecation Notice Format
|
||||
|
||||
When documenting deprecated code:
|
||||
|
||||
```python
|
||||
"""[Description].
|
||||
|
||||
.. deprecated:: X.X.X
|
||||
`ClassName` is deprecated and will be removed in a future version.
|
||||
Use `NewClassName` instead.
|
||||
"""
|
||||
```
|
||||
|
||||
## Checklist
|
||||
|
||||
Before finishing, verify:
|
||||
|
||||
- [ ] Module has a docstring at the top (after copyright header and imports)
|
||||
- [ ] All public classes have docstrings
|
||||
- [ ] All `__init__` methods document their parameters
|
||||
- [ ] All public methods have docstrings with Args/Returns/Raises as needed
|
||||
- [ ] Dataclasses use "Parameters:" section for field descriptions
|
||||
- [ ] Enums document each value in "Parameters:" section
|
||||
- [ ] Writing is concise and action-oriented
|
||||
- [ ] No documentation added to private methods (starting with `_`)
|
||||
- [ ] Existing complete docstrings were left unchanged
|
||||
128
.claude/skills/pr-description/SKILL.md
Normal file
128
.claude/skills/pr-description/SKILL.md
Normal file
@@ -0,0 +1,128 @@
|
||||
---
|
||||
name: pr-description
|
||||
description: Update a GitHub PR description with a summary of changes
|
||||
---
|
||||
|
||||
Update a GitHub pull request description based on the changes in the PR.
|
||||
|
||||
## Arguments
|
||||
|
||||
```
|
||||
/pr-description <PR_NUMBER> [--fixes <ISSUE_NUMBERS>]
|
||||
```
|
||||
|
||||
- `PR_NUMBER` (required): The pull request number to update
|
||||
- `--fixes` (optional): Comma-separated issue numbers that this PR fixes (e.g., `--fixes 123,456`)
|
||||
|
||||
Examples:
|
||||
- `/pr-description 3534`
|
||||
- `/pr-description 3534 --fixes 123`
|
||||
- `/pr-description 3534 --fixes 123,456,789`
|
||||
|
||||
## Instructions
|
||||
|
||||
1. First, gather information about the PR:
|
||||
- Use GitHub plugin to get PR details (title, current description, base branch)
|
||||
- Use local git to get commits: `git log main..HEAD --oneline`
|
||||
- Use local git to get the diff: `git diff main..HEAD`
|
||||
- Parse any `--fixes` argument for issue numbers
|
||||
|
||||
2. Check the existing PR description:
|
||||
- If it already has a complete, accurate description that reflects the changes, do nothing
|
||||
- If it's missing sections, incomplete, or outdated compared to the actual changes, proceed to update
|
||||
- If it only has the template placeholder text, generate a full description
|
||||
|
||||
3. Analyze the changes:
|
||||
- Understand the purpose of each commit
|
||||
- Identify any breaking changes (API changes, removed features, behavior changes)
|
||||
- Look for new features, bug fixes, refactoring, or documentation changes
|
||||
- Collect issue numbers from:
|
||||
- The `--fixes` argument (if provided)
|
||||
- Commit messages (patterns like "Fixes #123", "Closes #456", "Resolves #789")
|
||||
|
||||
4. Generate or update the PR description with these sections:
|
||||
|
||||
## PR Description Format
|
||||
|
||||
### Summary (always include)
|
||||
|
||||
Brief bullet points describing what changed and why. Focus on the *purpose* and *impact*, not implementation details.
|
||||
|
||||
```markdown
|
||||
## Summary
|
||||
|
||||
- Added X to enable Y
|
||||
- Fixed bug where Z would happen
|
||||
- Refactored W for better maintainability
|
||||
```
|
||||
|
||||
### Breaking Changes (include only if applicable)
|
||||
|
||||
Document any changes that affect existing users or APIs.
|
||||
|
||||
```markdown
|
||||
## Breaking Changes
|
||||
|
||||
- `ClassName.method()` now requires a `param` argument
|
||||
- Removed deprecated `old_function()` - use `new_function()` instead
|
||||
```
|
||||
|
||||
### Testing (include when non-obvious)
|
||||
|
||||
How to verify the changes work. Skip for trivial changes.
|
||||
|
||||
```markdown
|
||||
## Testing
|
||||
|
||||
- Run `uv run pytest tests/test_feature.py` to verify the fix
|
||||
- Example usage: `uv run examples/new_feature.py`
|
||||
```
|
||||
|
||||
### Fixes (include if issues are provided or found in commits)
|
||||
|
||||
List issues this PR fixes. GitHub will automatically close these issues when the PR is merged.
|
||||
|
||||
```markdown
|
||||
## Fixes
|
||||
|
||||
- Fixes #123
|
||||
- Fixes #456
|
||||
```
|
||||
|
||||
Note: Use "Fixes #X" format (not "Closes" or "Resolves") for consistency. Each issue should be on its own line with "Fixes" to ensure GitHub auto-closes them.
|
||||
|
||||
## Guidelines
|
||||
|
||||
- **Be concise** - Reviewers should understand the PR in 30 seconds
|
||||
- **Focus on why** - The diff shows *what* changed, explain *why*
|
||||
- **Skip empty sections** - Only include sections that have content
|
||||
- **Use bullet points** - Easier to scan than paragraphs
|
||||
- **Don't duplicate the diff** - Avoid listing every file or line changed
|
||||
|
||||
## Example Output
|
||||
|
||||
```markdown
|
||||
## Summary
|
||||
|
||||
- Added `/docstring` skill for documenting Python modules with Google-style docstrings
|
||||
- Skill finds classes by name and handles conflicts when multiple matches exist
|
||||
- Skips already-documented code to avoid unnecessary changes
|
||||
|
||||
## Testing
|
||||
|
||||
/docstring ClassName
|
||||
|
||||
## Fixes
|
||||
|
||||
- Fixes #123
|
||||
```
|
||||
|
||||
## Checklist
|
||||
|
||||
Before updating the PR:
|
||||
|
||||
- [ ] Verified existing description needs updating (not already complete)
|
||||
- [ ] Summary accurately reflects the changes
|
||||
- [ ] Breaking changes are clearly documented (if any)
|
||||
- [ ] No unnecessary sections included
|
||||
- [ ] Description is concise and scannable
|
||||
28
.claude/skills/pr-submit/SKILL.md
Normal file
28
.claude/skills/pr-submit/SKILL.md
Normal file
@@ -0,0 +1,28 @@
|
||||
---
|
||||
name: pr-submit
|
||||
description: Create and submit a GitHub PR from the current branch
|
||||
---
|
||||
|
||||
Submit the current changes as a GitHub pull request.
|
||||
|
||||
## Instructions
|
||||
|
||||
1. Check the current state of the repository:
|
||||
- Run `git status` to see staged, unstaged, and untracked changes
|
||||
- Run `git diff` to see current changes
|
||||
- Run `git log --oneline -10` to see recent commits
|
||||
|
||||
2. If there are uncommitted changes relevant to the PR:
|
||||
- Ask the user if they want a specific prefix for the branch name (e.g., `alice/`, `fix/`, `feat/`)
|
||||
- Create a new branch based on the current branch
|
||||
- Commit the changes using multiple commits if the changes are unrelated
|
||||
|
||||
3. Push the branch and create the PR:
|
||||
- Push with `-u` flag to set upstream tracking
|
||||
- Create the PR using `gh pr create`
|
||||
|
||||
4. After the PR is created:
|
||||
- Run `/changelog <pr_number>` to generate changelog files, then commit and push them
|
||||
- Run `/pr-description <pr_number>` to update the PR description
|
||||
|
||||
5. Return the PR URL to the user.
|
||||
250
.claude/skills/update-docs/SKILL.md
Normal file
250
.claude/skills/update-docs/SKILL.md
Normal file
@@ -0,0 +1,250 @@
|
||||
---
|
||||
name: update-docs
|
||||
description: Update documentation pages to match source code changes on the current branch
|
||||
---
|
||||
|
||||
Update documentation pages to reflect source code changes on the current branch. Analyzes the diff against main, maps changed source files to their corresponding doc pages, and makes targeted edits.
|
||||
|
||||
## Arguments
|
||||
|
||||
```
|
||||
/update-docs [DOCS_PATH]
|
||||
```
|
||||
|
||||
- `DOCS_PATH` (optional): Path to the docs repository root. If not provided, ask the user.
|
||||
|
||||
Examples:
|
||||
- `/update-docs /Users/me/src/docs`
|
||||
- `/update-docs`
|
||||
|
||||
## Instructions
|
||||
|
||||
### Step 1: Resolve docs path
|
||||
|
||||
If `DOCS_PATH` was provided as an argument, use it. Otherwise, ask the user for the path to their docs repository.
|
||||
|
||||
Verify the path exists and contains `server/services/` subdirectory.
|
||||
|
||||
### Step 2: Create docs branch
|
||||
|
||||
Get the current pipecat branch name:
|
||||
```bash
|
||||
git rev-parse --abbrev-ref HEAD
|
||||
```
|
||||
|
||||
In the docs repo, create a new branch off main with a matching name:
|
||||
```bash
|
||||
cd DOCS_PATH && git checkout main && git pull && git checkout -b {branch-name}-docs
|
||||
```
|
||||
|
||||
For example, if the pipecat branch is `feat/new-service`, the docs branch becomes `feat/new-service-docs`.
|
||||
|
||||
All doc edits in subsequent steps are made on this branch.
|
||||
|
||||
### Step 3: Detect changed source files
|
||||
|
||||
Run:
|
||||
```bash
|
||||
git diff main..HEAD --name-only
|
||||
```
|
||||
|
||||
Filter to files that could affect documentation:
|
||||
- `src/pipecat/services/**/*.py` (service implementations)
|
||||
- `src/pipecat/transports/**/*.py` (transport implementations)
|
||||
- `src/pipecat/serializers/**/*.py` (serializer implementations)
|
||||
- `src/pipecat/processors/**/*.py` (processor implementations)
|
||||
- `src/pipecat/audio/**/*.py` (audio utilities)
|
||||
- `src/pipecat/turns/**/*.py` (turn management)
|
||||
- `src/pipecat/observers/**/*.py` (observers)
|
||||
- `src/pipecat/pipeline/**/*.py` (pipeline core)
|
||||
|
||||
Ignore `__init__.py`, `__pycache__`, test files, and files that only contain type re-exports.
|
||||
|
||||
### Step 4: Map source files to doc pages
|
||||
|
||||
For each changed source file, find the corresponding doc page. Read the mapping file at `.claude/skills/update-docs/SOURCE_DOC_MAPPING.md` and apply its tiered lookup: tier 1 (known exceptions) → tier 2 (pattern matching) → tier 3 (search fallback). **First match wins.**
|
||||
|
||||
### Step 5: Analyze each source-doc pair
|
||||
|
||||
For each mapped pair:
|
||||
|
||||
1. **Read the full source file** to understand current state
|
||||
2. **Read the diff** for that file: `git diff main..HEAD -- <source_file>`
|
||||
3. **Read the current doc page** in full
|
||||
|
||||
Identify what changed by comparing source to docs:
|
||||
|
||||
- **Constructor parameters**: Compare `__init__` signature to the Configuration section's `<ParamField>` entries
|
||||
- **InputParams fields**: Compare `InputParams(BaseModel)` class fields to the InputParams table
|
||||
- **Event handlers**: Compare `_register_event_handler` calls and event handler definitions to Event Handlers section
|
||||
- **Class names / imports**: Check if Usage examples reference correct names
|
||||
- **Behavioral changes**: Check if Notes section needs updating
|
||||
|
||||
### Step 6: Make targeted edits
|
||||
|
||||
For each doc page that needs updates, edit **only the sections that need changes**. Preserve all other content exactly as-is.
|
||||
|
||||
#### Rules
|
||||
|
||||
- **Never remove content** unless the corresponding source code was removed
|
||||
- **Never rewrite sections** that are already accurate
|
||||
- **Match existing formatting** — if the page uses `<ParamField>` tags, use them; if it uses tables, use tables
|
||||
- **Keep descriptions concise** — match the tone and length of surrounding content
|
||||
- **Preserve CardGroup, links, and examples** unless they reference removed functionality
|
||||
- **Don't touch frontmatter** unless the class was renamed
|
||||
|
||||
#### Section-specific guidance
|
||||
|
||||
**Configuration** (constructor params):
|
||||
- Use `<ParamField path="name" type="type" default="value">` format if the page already uses it
|
||||
- Add new params in logical order (required first, then optional)
|
||||
- Remove params that no longer exist in source
|
||||
- Update types/defaults that changed
|
||||
|
||||
**InputParams** (runtime settings):
|
||||
- Use markdown table format: `| Parameter | Type | Default | Description |`
|
||||
- Match the field names and types from the `InputParams(BaseModel)` class
|
||||
- Include the default values from the source
|
||||
|
||||
**Usage** (code examples):
|
||||
- Update import paths, class names, and parameter names
|
||||
- Only modify examples if they would break or be misleading with the new API
|
||||
- Don't rewrite working examples just to add new optional params
|
||||
|
||||
**Notes**:
|
||||
- Add notes for new behavioral gotchas or breaking changes
|
||||
- Remove notes about limitations that were fixed
|
||||
- Keep existing notes that are still accurate
|
||||
|
||||
**Event Handlers**:
|
||||
- Update the event table and example code
|
||||
- Add new events, remove deleted ones
|
||||
- Update handler signatures if they changed
|
||||
|
||||
**Overview / Key Features / Prerequisites**:
|
||||
- Only update if the PR fundamentally changes what the service does (new capability, removed capability, renamed class)
|
||||
- Most PRs will NOT need changes to these sections
|
||||
|
||||
### Step 7: Update guides
|
||||
|
||||
Guides at `DOCS_PATH/guides/` reference specific class names, parameters, imports, and code patterns. After completing reference doc edits, check if any guides need updates too.
|
||||
|
||||
For each changed source file, collect the class names, renamed parameters, and changed imports from the diff. Search the guides directory:
|
||||
```bash
|
||||
grep -rl "ClassName\|old_param_name" DOCS_PATH/guides/
|
||||
```
|
||||
|
||||
For each guide that references changed code:
|
||||
1. Read the full guide
|
||||
2. Update class names, parameter names, import paths, and code examples that are now incorrect
|
||||
3. **Don't rewrite prose** — only fix the specific references that changed
|
||||
4. Leave guides alone if they reference the service generally but don't use any changed APIs
|
||||
|
||||
Guide directories:
|
||||
- `guides/learn/` — conceptual tutorials (pipeline, LLM, STT, TTS, etc.)
|
||||
- `guides/fundamentals/` — practical how-tos (metrics, recording, transcripts, etc.)
|
||||
- `guides/features/` — feature-specific guides (Gemini Live, OpenAI audio, WhatsApp, etc.)
|
||||
- `guides/telephony/` — telephony integration guides (Twilio, Plivo, Telnyx, etc.)
|
||||
|
||||
### Step 8: Identify doc gaps
|
||||
|
||||
After processing all mapped pairs, check for two kinds of gaps:
|
||||
|
||||
**Missing pages**: Source files that had no doc page mapping (neither tier 1, 2, nor 3) and are not marked as "(skip)". For each, tell the user:
|
||||
- The source file path
|
||||
- The main class(es) it defines
|
||||
- Whether a new doc page should be created
|
||||
|
||||
**Missing sections**: Mapped doc pages that are missing standard sections compared to the source. For example, a transport page with no Configuration section, or a service page with no InputParams table when the source defines `InputParams(BaseModel)`. Flag these and offer to add the missing sections.
|
||||
|
||||
If the user wants a new page, create it using this template structure:
|
||||
```
|
||||
---
|
||||
title: "Service Name"
|
||||
description: "Brief description"
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
[Description from class docstring or source analysis]
|
||||
|
||||
<CardGroup cols={2}>
|
||||
[Cards for API reference and examples if available]
|
||||
</CardGroup>
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install "pipecat-ai[package-name]"
|
||||
```
|
||||
|
||||
## Prerequisites
|
||||
|
||||
[Environment variables and account setup]
|
||||
|
||||
## Configuration
|
||||
|
||||
[ParamField entries for constructor params]
|
||||
|
||||
## InputParams
|
||||
|
||||
[Table of InputParams fields, if the service has them]
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Setup
|
||||
|
||||
```python
|
||||
[Minimal working example]
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
[Important caveats]
|
||||
|
||||
## Event Handlers
|
||||
|
||||
[Event table and example code]
|
||||
```
|
||||
|
||||
### Step 9: Output summary
|
||||
|
||||
After all edits are complete, print a summary:
|
||||
|
||||
```
|
||||
## Documentation Updates
|
||||
|
||||
### Updated reference pages
|
||||
- `server/services/stt/deepgram.mdx` — Updated Configuration (added `new_param`), InputParams (updated `language` default)
|
||||
- `server/services/tts/elevenlabs.mdx` — Updated Event Handlers (added `on_connected`)
|
||||
|
||||
### Updated guides
|
||||
- `guides/learn/speech-to-text.mdx` — Updated code example (renamed `old_param` → `new_param`)
|
||||
|
||||
### Unmapped source files
|
||||
- `src/pipecat/services/newprovider/tts.py` — NewProviderTTSService (no doc page exists)
|
||||
|
||||
### Skipped files
|
||||
- `src/pipecat/services/ai_service.py` — internal base class
|
||||
```
|
||||
|
||||
## Guidelines
|
||||
|
||||
- **Be conservative** — only change what the diff warrants. Don't "improve" docs beyond what changed in source.
|
||||
- **Read before editing** — always read the full doc page before making changes so you understand the existing structure.
|
||||
- **Preserve voice** — match the writing style of the existing doc page, don't impose a different tone.
|
||||
- **One PR at a time** — this skill operates on the current branch's diff against main. Don't look at other branches.
|
||||
- **Parallel analysis** — when multiple source files map to different doc pages, analyze and edit them in parallel for efficiency.
|
||||
- **Shared source files** — files like `services/google/google.py` are shared bases. Check which services import from them and update all affected doc pages.
|
||||
|
||||
## Checklist
|
||||
|
||||
Before finishing, verify:
|
||||
|
||||
- [ ] All changed source files were checked against the mapping table
|
||||
- [ ] Each doc page edit matches the actual source code change (not guessed)
|
||||
- [ ] No content was removed unless the corresponding source was removed
|
||||
- [ ] New parameters have accurate types and defaults from source
|
||||
- [ ] Formatting matches the existing page style
|
||||
- [ ] Guides referencing changed APIs were checked and updated
|
||||
- [ ] Unmapped files were reported to the user
|
||||
79
.claude/skills/update-docs/SOURCE_DOC_MAPPING.md
Normal file
79
.claude/skills/update-docs/SOURCE_DOC_MAPPING.md
Normal file
@@ -0,0 +1,79 @@
|
||||
# Source-to-Doc Mapping
|
||||
|
||||
Maps pipecat source files to their documentation pages. Source paths are relative to `src/pipecat/`. Doc paths are relative to `DOCS_PATH`.
|
||||
|
||||
## Name mismatches
|
||||
|
||||
These source paths don't follow the standard `services/{provider}/{type}.py` → `server/services/{type}/{provider}.mdx` pattern.
|
||||
|
||||
| Source path | Doc page |
|
||||
|---|---|
|
||||
| `services/google/llm.py` | `server/services/llm/gemini.mdx` |
|
||||
| `services/google/llm_vertex.py` | `server/services/llm/google-vertex.mdx` |
|
||||
| `services/google/google.py` | (shared base — check which services use it) |
|
||||
| `services/google/gemini_live/**` | `server/services/s2s/gemini-live.mdx` |
|
||||
| `services/google/gemini_live/llm_vertex.py` | `server/services/s2s/gemini-live-vertex.mdx` |
|
||||
| `services/aws_nova_sonic/**` | `server/services/s2s/aws.mdx` |
|
||||
| `services/ultravox/**` | `server/services/s2s/ultravox.mdx` |
|
||||
| `services/grok/realtime/**` | `server/services/s2s/grok.mdx` |
|
||||
| `services/openai/realtime/**` | `server/services/s2s/openai.mdx` |
|
||||
| `processors/frameworks/rtvi.py` | `server/frameworks/rtvi/rtvi-processor.mdx` and `server/frameworks/rtvi/rtvi-observer.mdx` |
|
||||
| `processors/transcript_processor.py` | `server/utilities/transcript-processor.mdx` |
|
||||
| `processors/user_idle_processor.py` | `server/utilities/user-idle-processor.mdx` |
|
||||
| `processors/idle_frame_processor.py` | `server/pipeline/pipeline-idle-detection.mdx` |
|
||||
| `pipeline/task.py` | `server/pipeline/pipeline-task.mdx` |
|
||||
| `pipeline/runner.py` | `server/utilities/runner/guide.mdx` |
|
||||
| `transports/base_transport.py` | `server/services/transport/transport-params.mdx` |
|
||||
|
||||
## Skip list
|
||||
|
||||
These files should never trigger doc updates.
|
||||
|
||||
| Pattern | Reason |
|
||||
|---|---|
|
||||
| `services/ai_service.py` | Internal base class |
|
||||
| `services/stt_service.py` | Internal base class |
|
||||
| `services/tts_service.py` | Internal base class |
|
||||
| `services/llm_service.py` | Internal base class |
|
||||
| `services/websocket_service.py` | Internal base class |
|
||||
| `services/openai_realtime_beta/**` | Deprecated |
|
||||
| `services/openai_realtime/**` | Deprecated |
|
||||
| `services/gemini_multimodal_live/**` | Deprecated |
|
||||
| `services/aws/agent_core.py` | Internal |
|
||||
| `services/aws/sagemaker/**` | No doc page |
|
||||
| `transports/base_input.py` | Internal base class |
|
||||
| `transports/base_output.py` | Internal base class |
|
||||
| `transports/websocket/client.py` | No doc page |
|
||||
| `serializers/base_serializer.py` | Internal base class |
|
||||
| `serializers/protobuf.py` | Internal |
|
||||
| `processors/audio/**` | Internal |
|
||||
| `pipeline/pipeline.py` | Core architecture, not a service doc |
|
||||
|
||||
## Pattern matching
|
||||
|
||||
For files not in the tables above, apply these patterns. Convert underscores to hyphens in provider names for doc filenames.
|
||||
|
||||
| Source pattern | Doc pattern |
|
||||
|---|---|
|
||||
| `services/{provider}/stt*.py` | `server/services/stt/{provider}.mdx` |
|
||||
| `services/{provider}/tts*.py` | `server/services/tts/{provider}.mdx` |
|
||||
| `services/{provider}/llm*.py` | `server/services/llm/{provider}.mdx` |
|
||||
| `services/{provider}/image*.py` | `server/services/image-generation/{provider}.mdx` |
|
||||
| `services/{provider}/video*.py` | `server/services/video/{provider}.mdx` |
|
||||
| `services/{provider}/realtime/**` | `server/services/s2s/{provider}.mdx` |
|
||||
| `transports/{name}/**` | `server/services/transport/{name}.mdx` |
|
||||
| `serializers/{name}.py` | `server/services/serializers/{name}.mdx` |
|
||||
| `observers/**` | `server/utilities/observers/` (match by class name) |
|
||||
| `audio/vad/**` | `server/utilities/audio/` (match by class name) |
|
||||
| `audio/filters/**` | `server/utilities/audio/` (match by class name) |
|
||||
| `audio/mixers/**` | `server/utilities/audio/` (match by class name) |
|
||||
| `processors/filters/**` | `server/utilities/filters/` (match by class name) |
|
||||
|
||||
If the doc file doesn't exist at the resolved path, the file is **unmapped**.
|
||||
|
||||
## Search fallback
|
||||
|
||||
For files that don't match any table or pattern above:
|
||||
1. Extract the main class name(s) from the source file
|
||||
2. Search the docs directory for that class name: `grep -r "ClassName" DOCS_PATH/server/`
|
||||
3. If found in a doc page, use that as the mapping
|
||||
12
.github/workflows/coverage.yaml
vendored
12
.github/workflows/coverage.yaml
vendored
@@ -29,11 +29,21 @@ jobs:
|
||||
|
||||
- name: Install system packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y portaudio19-dev
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain --extra websocket
|
||||
uv sync --group dev \
|
||||
--extra anthropic \
|
||||
--extra aws \
|
||||
--extra google \
|
||||
--extra langchain \
|
||||
--extra livekit \
|
||||
--extra local-smart-turn-v3 \
|
||||
--extra piper \
|
||||
--extra tracing \
|
||||
--extra websocket
|
||||
|
||||
- name: Run tests with coverage
|
||||
run: |
|
||||
|
||||
2
.github/workflows/generate-changelog.yml
vendored
2
.github/workflows/generate-changelog.yml
vendored
@@ -86,7 +86,7 @@ jobs:
|
||||
fi
|
||||
|
||||
# Validate fragment types
|
||||
VALID_TYPES="added changed deprecated removed fixed security other"
|
||||
VALID_TYPES="added changed deprecated removed fixed performance security other"
|
||||
INVALID_FRAGMENTS=""
|
||||
|
||||
for file in changelog/*.md; do
|
||||
|
||||
14
.github/workflows/python-compatibility.yaml
vendored
14
.github/workflows/python-compatibility.yaml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ['3.10.18', '3.11.13', '3.12.11', '3.13.5']
|
||||
python-version: ['3.10.19', '3.11.14', '3.12.12', '3.13.12']
|
||||
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
@@ -40,20 +40,10 @@ jobs:
|
||||
uv python install ${{ matrix.python-version }}
|
||||
uv python pin ${{ matrix.python-version }}
|
||||
|
||||
- name: Test uv sync with all extras (Python < 3.13)
|
||||
if: "!startsWith(matrix.python-version, '3.13.')"
|
||||
- name: Test uv sync with all extras
|
||||
run: |
|
||||
uv sync --group dev --all-extras --no-extra krisp
|
||||
|
||||
- name: Test uv sync without PyTorch extras (Python 3.13+)
|
||||
if: startsWith(matrix.python-version, '3.13.')
|
||||
run: |
|
||||
uv sync --group dev --all-extras \
|
||||
--no-extra krisp \
|
||||
--no-extra local-smart-turn \
|
||||
--no-extra moondream \
|
||||
--no-extra mlx-whisper
|
||||
|
||||
- name: Verify installation
|
||||
run: |
|
||||
uv run python --version
|
||||
|
||||
12
.github/workflows/tests.yaml
vendored
12
.github/workflows/tests.yaml
vendored
@@ -33,11 +33,21 @@ jobs:
|
||||
|
||||
- name: Install system packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y portaudio19-dev
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain --extra websocket
|
||||
uv sync --group dev \
|
||||
--extra anthropic \
|
||||
--extra aws \
|
||||
--extra google \
|
||||
--extra langchain \
|
||||
--extra livekit \
|
||||
--extra local-smart-turn-v3 \
|
||||
--extra piper \
|
||||
--extra tracing \
|
||||
--extra websocket
|
||||
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
|
||||
11
.gitignore
vendored
11
.gitignore
vendored
@@ -4,7 +4,14 @@ __pycache__/
|
||||
*~
|
||||
venv
|
||||
.venv
|
||||
/.idea
|
||||
.idea
|
||||
.gradle
|
||||
.next
|
||||
next-env.d.ts
|
||||
local.properties
|
||||
*.log
|
||||
*.lock
|
||||
smart_turn_audio_log
|
||||
#*#
|
||||
|
||||
# Distribution / Packaging
|
||||
@@ -27,7 +34,7 @@ share/python-wheels/
|
||||
*.egg
|
||||
MANIFEST
|
||||
.DS_Store
|
||||
.env
|
||||
.env*
|
||||
fly.toml
|
||||
|
||||
# Examples
|
||||
|
||||
931
CHANGELOG.md
931
CHANGELOG.md
@@ -7,6 +7,937 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
<!-- towncrier release notes start -->
|
||||
|
||||
## [0.0.103] - 2026-02-20
|
||||
|
||||
### Added
|
||||
|
||||
- Added `"timestampTransportStrategy": "ASYNC"` to `InworldAITTSService`. This
|
||||
allows timestamps info to trail audio chunks arrival, resulting in much
|
||||
better first audio chunk latency
|
||||
(PR [#3625](https://github.com/pipecat-ai/pipecat/pull/3625))
|
||||
|
||||
- Added model-specific `InputParams` to `RimeTTSService`: arcana params
|
||||
(`repetition_penalty`, `temperature`, `top_p`) and mistv2 params
|
||||
(`no_text_normalization`, `save_oovs`, `segment`). Model, voice, and param
|
||||
changes now trigger WebSocket reconnection.
|
||||
(PR [#3642](https://github.com/pipecat-ai/pipecat/pull/3642))
|
||||
|
||||
- Added `write_transport_frame()` hook to `BaseOutputTransport` allowing
|
||||
transport subclasses to handle custom frame types that flow through the audio
|
||||
queue.
|
||||
(PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719))
|
||||
|
||||
- Added `DailySIPTransferFrame` and `DailySIPReferFrame` to the Daily
|
||||
transport. These frames queue SIP transfer and SIP REFER operations with
|
||||
audio, so the operation executes only after the bot finishes its current
|
||||
utterance.
|
||||
(PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719))
|
||||
|
||||
- Added keepalive support to `SarvamSTTService` to prevent idle connection
|
||||
timeouts (e.g. when used behind a `ServiceSwitcher`).
|
||||
(PR [#3730](https://github.com/pipecat-ai/pipecat/pull/3730))
|
||||
|
||||
- Added `UserIdleTimeoutUpdateFrame` to enable or disable user idle detection
|
||||
at runtime by updating the timeout dynamically.
|
||||
(PR [#3748](https://github.com/pipecat-ai/pipecat/pull/3748))
|
||||
|
||||
- Added `broadcast_sibling_id` field to the base `Frame` class. This field is
|
||||
automatically set by `broadcast_frame()` and `broadcast_frame_instance()` to
|
||||
the ID of the paired frame pushed in the opposite direction, allowing
|
||||
receivers to identify broadcast pairs.
|
||||
(PR [#3774](https://github.com/pipecat-ai/pipecat/pull/3774))
|
||||
|
||||
- Added `ignored_sources` parameter to `RTVIObserverParams` and
|
||||
`add_ignored_source()`/`remove_ignored_source()` methods to `RTVIObserver` to
|
||||
suppress RTVI messages from specific pipeline processors (e.g. a silent
|
||||
evaluation LLM).
|
||||
(PR [#3779](https://github.com/pipecat-ai/pipecat/pull/3779))
|
||||
|
||||
- Added `DeepgramSageMakerTTSService` for running Deepgram TTS models deployed
|
||||
on AWS SageMaker endpoints via HTTP/2 bidirectional streaming. Supports the
|
||||
Deepgram TTS protocol (Speak, Flush, Clear, Close), interruption handling,
|
||||
and per-turn TTFB metrics.
|
||||
(PR [#3785](https://github.com/pipecat-ai/pipecat/pull/3785))
|
||||
|
||||
### Changed
|
||||
|
||||
- ⚠️ `RimeTTSService` now defaults to `model="arcana"` and the
|
||||
`wss://users-ws.rime.ai/ws3` endpoint. `InputParams` defaults changed from
|
||||
mistv2-specific values to `None` — only explicitly-set params are sent as
|
||||
query params.
|
||||
(PR [#3642](https://github.com/pipecat-ai/pipecat/pull/3642))
|
||||
|
||||
- `AICFilter` now shares read-only AIC models via a singleton `AICModelManager`
|
||||
in `aic_filter.py`.
|
||||
- Multiple filters using the same model path or `(model_id,
|
||||
model_download_dir)` share one loaded model, with reference counting and
|
||||
concurrent load deduplication.
|
||||
- Model file I/O runs off the event loop so the filter does not block.
|
||||
(PR [#3684](https://github.com/pipecat-ai/pipecat/pull/3684))
|
||||
|
||||
- Added `X-User-Agent` and `X-Request-Id` headers to `InworldTTSService` for
|
||||
better traceability.
|
||||
(PR [#3706](https://github.com/pipecat-ai/pipecat/pull/3706))
|
||||
|
||||
- `DailyUpdateRemoteParticipantsFrame` is no longer deprecated and is now
|
||||
queued with audio like other transport frames.
|
||||
(PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719))
|
||||
|
||||
- Bumped Pillow dependency upper bound from `<12` to `<13` to allow Pillow
|
||||
12.x.
|
||||
(PR [#3728](https://github.com/pipecat-ai/pipecat/pull/3728))
|
||||
|
||||
- Moved STT keepalive mechanism from `WebsocketSTTService` to the `STTService`
|
||||
base class, allowing any STT service (not just websocket-based ones) to use
|
||||
idle-connection keepalive via the `keepalive_timeout` and
|
||||
`keepalive_interval` parameters.
|
||||
(PR [#3730](https://github.com/pipecat-ai/pipecat/pull/3730))
|
||||
|
||||
- Improved audio context management in `AudioContextTTSService` by moving
|
||||
context ID tracking to the base class and adding
|
||||
`reuse_context_id_within_turn` parameter to control concurrent TTS request
|
||||
handling.
|
||||
- Added helper methods: `has_active_audio_context()`,
|
||||
`get_active_audio_context_id()`, `remove_active_audio_context()`,
|
||||
`reset_active_audio_context()`
|
||||
- Simplified Cartesia, ElevenLabs, Inworld, Rime, AsyncAI, and Gradium TTS
|
||||
implementations by removing duplicate context management code
|
||||
(PR [#3732](https://github.com/pipecat-ai/pipecat/pull/3732))
|
||||
|
||||
- `UserIdleController` is now always created with a default timeout of 0
|
||||
(disabled). The `user_idle_timeout` parameter changed from `Optional[float] =
|
||||
None` to `float = 0` in `UserTurnProcessor`, `LLMUserAggregatorParams`, and
|
||||
`UserIdleController`.
|
||||
(PR [#3748](https://github.com/pipecat-ai/pipecat/pull/3748))
|
||||
|
||||
- Change the version specifier from `>=0.2.8` to `~=0.2.8` for the
|
||||
`speechmatics-voice` package to ensure compatibility with future patch
|
||||
versions.
|
||||
(PR [#3761](https://github.com/pipecat-ai/pipecat/pull/3761))
|
||||
|
||||
- Updated `InworldTTSService` and `InworldHttpTTSService` to use `ASYNC`
|
||||
timestamp transport strategy by default
|
||||
(PR [#3765](https://github.com/pipecat-ai/pipecat/pull/3765))
|
||||
|
||||
- Added `start_time` and `end_time` parameters to `start_ttfb_metrics()`,
|
||||
`stop_ttfb_metrics()`, `start_processing_metrics()`, and
|
||||
`stop_processing_metrics()` in `FrameProcessor` and `FrameProcessorMetrics`,
|
||||
allowing custom timestamps for metrics measurement. `STTService` now uses
|
||||
these instead of custom TTFB tracking.
|
||||
(PR [#3776](https://github.com/pipecat-ai/pipecat/pull/3776))
|
||||
|
||||
- Updated default Anthropic model from `claude-sonnet-4-5-20250929` to
|
||||
`claude-sonnet-4-6`.
|
||||
(PR [#3792](https://github.com/pipecat-ai/pipecat/pull/3792))
|
||||
|
||||
### Deprecated
|
||||
|
||||
- Deprecated unused `Traceable`, `@traceable`, `@traced`, and
|
||||
`AttachmentStrategy` in `pipecat.utils.tracing.class_decorators`. This module
|
||||
will be removed in a future release.
|
||||
(PR [#3733](https://github.com/pipecat-ai/pipecat/pull/3733))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed race condition where `RTVIObserver` could send messages before
|
||||
`DailyTransport` join completed. Outbound messages are now queued & delivered
|
||||
after the transport is ready.
|
||||
(PR [#3615](https://github.com/pipecat-ai/pipecat/pull/3615))
|
||||
|
||||
- Fixed async generator cleanup in OpenAI LLM streaming to prevent
|
||||
`AttributeError` with uvloop on Python 3.12+ (MagicStack/uvloop#699).
|
||||
(PR [#3698](https://github.com/pipecat-ai/pipecat/pull/3698))
|
||||
|
||||
- Fixed `SmallWebRTCTransport` input audio resampling to properly handle all
|
||||
sample rates, including 8kHz audio.
|
||||
(PR [#3713](https://github.com/pipecat-ai/pipecat/pull/3713))
|
||||
|
||||
- Fixed a race condition in `RTVIObserver` where bot output messages could be
|
||||
sent before the bot-started-speaking event.
|
||||
(PR [#3718](https://github.com/pipecat-ai/pipecat/pull/3718))
|
||||
|
||||
- Fixed Grok Realtime `session.updated` event parsing failure caused by the API
|
||||
returning prefixed voice names (e.g. `"human_Ara"` instead of `"Ara"`).
|
||||
(PR [#3720](https://github.com/pipecat-ai/pipecat/pull/3720))
|
||||
|
||||
- Fixed context ID reuse issue in `ElevenLabsTTSService`, `InworldTTSService`,
|
||||
`RimeTTSService`, `CartesiaTTSService`, `AsyncAITTSService`, and
|
||||
`PlayHTTTSService`. Services now properly reuse the same context ID across
|
||||
multiple `run_tts()` invocations within a single LLM turn, preventing context
|
||||
tracking issues and incorrect lifecycle signaling.
|
||||
(PR [#3729](https://github.com/pipecat-ai/pipecat/pull/3729))
|
||||
|
||||
- Fixed word timestamp interleaving issue in `ElevenLabsTTSService` when
|
||||
processing multiple sentences within a single LLM turn.
|
||||
(PR [#3729](https://github.com/pipecat-ai/pipecat/pull/3729))
|
||||
|
||||
- Fixed tracing service decorators executing the wrapped function twice when
|
||||
the function itself raised an exception (e.g., LLM rate limit, TTS timeout).
|
||||
(PR [#3735](https://github.com/pipecat-ai/pipecat/pull/3735))
|
||||
|
||||
- Fixed `LLMUserAggregator` broadcasting mute events before `StartFrame`
|
||||
reaches downstream processors.
|
||||
(PR [#3737](https://github.com/pipecat-ai/pipecat/pull/3737))
|
||||
|
||||
- Fixed `UserIdleController` false idle triggers caused by gaps between user
|
||||
and bot activity frames. The idle timer now starts only after
|
||||
`BotStoppedSpeakingFrame` and is suppressed during active user turns and
|
||||
function calls.
|
||||
(PR [#3744](https://github.com/pipecat-ai/pipecat/pull/3744))
|
||||
|
||||
- Fixed incorrect `sample_rate` assignment in
|
||||
`TavusInputTransport._on_participant_audio_data` (was using
|
||||
`audio.audio_frames` instead of `audio.sample_rate`).
|
||||
(PR [#3768](https://github.com/pipecat-ai/pipecat/pull/3768))
|
||||
|
||||
- Fixed `RTVIObserver` not processing upstream-only frames. Previously, all
|
||||
upstream frames were filtered out to avoid duplicate messages from
|
||||
broadcasted frames. Now only upstream copies of broadcasted frames are
|
||||
skipped.
|
||||
(PR [#3774](https://github.com/pipecat-ai/pipecat/pull/3774))
|
||||
|
||||
- Fixed mutable default arguments in `LLMContextAggregatorPair.__init__()` that
|
||||
could cause shared state across instances.
|
||||
(PR [#3782](https://github.com/pipecat-ai/pipecat/pull/3782))
|
||||
|
||||
- Fixed `DeepgramSageMakerSTTService` to properly track finalize lifecycle
|
||||
using `request_finalize()` / `confirm_finalize()` and use `is_final` (instead
|
||||
of `is_final and speech_final`) for final transcription detection, matching
|
||||
`DeepgramSTTService` behavior.
|
||||
(PR [#3784](https://github.com/pipecat-ai/pipecat/pull/3784))
|
||||
|
||||
- Fixed a race condition in `AudioContextTTSService` where the audio context
|
||||
could time out between consecutive TTS requests within the same turn, causing
|
||||
audio to be discarded.
|
||||
(PR [#3787](https://github.com/pipecat-ai/pipecat/pull/3787))
|
||||
|
||||
- Fixed `push_interruption_task_frame_and_wait()` hanging indefinitely when the
|
||||
`InterruptionFrame` does not reach the pipeline sink within the timeout.
|
||||
Added a `timeout` keyword argument to customize the wait duration.
|
||||
(PR [#3789](https://github.com/pipecat-ai/pipecat/pull/3789))
|
||||
|
||||
## [0.0.102] - 2026-02-10
|
||||
|
||||
### Added
|
||||
|
||||
- Added `ResembleAITTSService` for text-to-speech using Resemble AI's streaming
|
||||
WebSocket API with word-level timestamps and jitter buffering for smooth
|
||||
audio playback.
|
||||
(PR [#3134](https://github.com/pipecat-ai/pipecat/pull/3134))
|
||||
|
||||
- Added `UserBotLatencyObserver` for tracking user-to-bot response latency.
|
||||
When tracing is enabled, latency measurements are automatically recorded as
|
||||
`turn.user_bot_latency_seconds` attributes on OpenTelemetry turn spans.
|
||||
(PR [#3355](https://github.com/pipecat-ai/pipecat/pull/3355))
|
||||
|
||||
- Added `append_to_context` parameter to `TTSSpeakFrame` for conditional LLM
|
||||
context addition.
|
||||
- Allows fine-grained control over whether text should be added to
|
||||
conversation context
|
||||
- Defaults to `True` to maintain backward compatibility
|
||||
(PR [#3584](https://github.com/pipecat-ai/pipecat/pull/3584))
|
||||
|
||||
- Added TTS context tracking system with `context_id` field to trace audio
|
||||
generation through the pipeline.
|
||||
- `TTSAudioRawFrame`, `TTSStartedFrame`, `TTSStoppedFrame` now include
|
||||
`context_id`
|
||||
- `AggregatedTextFrame` and `TTSTextFrame` now include `context_id`
|
||||
- Enables tracking which TTS request generated specific audio chunks
|
||||
(PR [#3584](https://github.com/pipecat-ai/pipecat/pull/3584))
|
||||
|
||||
- Added support for Inworld TTS Websocket Auto Mode for improved latency
|
||||
(PR [#3593](https://github.com/pipecat-ai/pipecat/pull/3593))
|
||||
|
||||
- Added new frames for context summarization: `LLMContextSummaryRequestFrame`
|
||||
and `LLMContextSummaryResultFrame`.
|
||||
(PR [#3621](https://github.com/pipecat-ai/pipecat/pull/3621))
|
||||
|
||||
- Added context summarization feature to automatically compress conversation
|
||||
history when conversation length limits (by token or message count) are
|
||||
reached, enabling efficient long-running conversations.
|
||||
- Configure via `enable_context_summarization=True` in
|
||||
`LLMAssistantAggregatorParams`
|
||||
- Customize behavior with `LLMContextSummarizationConfig` (max tokens,
|
||||
thresholds, etc.)
|
||||
- Automatically preserves incomplete function call sequences during
|
||||
summarization
|
||||
- See new examples:
|
||||
`examples/foundational/54-context-summarization-openai.py` and
|
||||
`examples/foundational/54a-context-summarization-google.py`
|
||||
(PR [#3621](https://github.com/pipecat-ai/pipecat/pull/3621))
|
||||
|
||||
- Added RTVI function call lifecycle events (`llm-function-call-started`,
|
||||
`llm-function-call-in-progress`, `llm-function-call-stopped`) with
|
||||
configurable security levels via
|
||||
`RTVIObserverParams.function_call_report_level`. Supports per-function
|
||||
control over what information is exposed (`DISABLED`, `NONE`, `NAME`, or
|
||||
`FULL`).
|
||||
(PR [#3630](https://github.com/pipecat-ai/pipecat/pull/3630))
|
||||
|
||||
- Added `RequestMetadataFrame` and metadata handling for `ServiceSwitcher` to
|
||||
ensure STT services correctly emit `STTMetadataFrame` when switching between
|
||||
services. Only the active service's metadata is propagated downstream,
|
||||
switching services triggers the newly active service to re-emit its metadata,
|
||||
and proper frame ordering is maintained at startup.
|
||||
(PR [#3637](https://github.com/pipecat-ai/pipecat/pull/3637))
|
||||
|
||||
- Added `STTMetadataFrame` to broadcast STT service latency information at
|
||||
pipeline start.
|
||||
- STT services broadcast P99 time-to-final-segment (`ttfs_p99_latency`) to
|
||||
downstream processors
|
||||
- Turn stop strategies automatically configure their STT timeout from this
|
||||
metadata
|
||||
- Developers can override `ttfs_p99_latency` via constructor argument for
|
||||
custom deployments
|
||||
- Added measured P99 values for STT providers.
|
||||
- See [stt-benchmark](https://github.com/pipecat-ai/stt-benchmark) to
|
||||
measure latency for your configuration
|
||||
(PR [#3637](https://github.com/pipecat-ai/pipecat/pull/3637))
|
||||
|
||||
- Added support for `is_sandbox` parameter in `LiveAvatarNewSessionRequest` to
|
||||
enable sandbox mode for HeyGen LiveAvatar sessions.
|
||||
(PR [#3653](https://github.com/pipecat-ai/pipecat/pull/3653))
|
||||
|
||||
- Added support for `video_settings` parameter in `LiveAvatarNewSessionRequest`
|
||||
to configure video encoding (H264/VP8) and quality levels.
|
||||
(PR [#3653](https://github.com/pipecat-ai/pipecat/pull/3653))
|
||||
|
||||
- Added `OpenAIRealtimeSTTService` for real-time streaming speech-to-text using
|
||||
OpenAI's Realtime API WebSocket transcription sessions. Supports local VAD
|
||||
and server-side VAD modes, noise reduction, and automatic reconnection.
|
||||
(PR [#3656](https://github.com/pipecat-ai/pipecat/pull/3656))
|
||||
|
||||
- Added `bulbul:v3-beta` TTS model support for Sarvam AI with temperature
|
||||
control and 25 new speaker voices.
|
||||
(PR [#3671](https://github.com/pipecat-ai/pipecat/pull/3671))
|
||||
|
||||
- Added `saaras:v3` STT model support for Sarvam AI with new `mode` parameter
|
||||
(transcribe, translate, verbatim, translit, codemix) and prompt support.
|
||||
(PR [#3671](https://github.com/pipecat-ai/pipecat/pull/3671))
|
||||
|
||||
- Added new OpenAI TTS voice options `marin` and `cedar`.
|
||||
(PR [#3682](https://github.com/pipecat-ai/pipecat/pull/3682))
|
||||
|
||||
- Added `UserMuteStartedFrame` and `UserMuteStoppedFrame` system frames, and
|
||||
corresponding `user-mute-started` / `user-mute-stopped` RTVI messages, so
|
||||
clients can observe when mute strategies activate or deactivate.
|
||||
(PR [#3687](https://github.com/pipecat-ai/pipecat/pull/3687))
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated all 30+ TTS service implementations to support context tracking with
|
||||
`context_id`.
|
||||
- Services now generate and propagate context IDs through TTS frames
|
||||
- Enables end-to-end tracing of TTS requests through the pipeline
|
||||
(PR [#3584](https://github.com/pipecat-ai/pipecat/pull/3584))
|
||||
|
||||
- ⚠️ `TTSService.run_tts()` now requires a `context_id` parameter for context
|
||||
tracking.
|
||||
- Custom TTS service implementations must update their `run_tts()`
|
||||
signature
|
||||
- Before: `async def run_tts(self, text: str) -> AsyncGenerator[Frame,
|
||||
None]:`
|
||||
- After: `async def run_tts(self, text: str, context_id: str) ->
|
||||
AsyncGenerator[Frame, None]:`
|
||||
(PR [#3584](https://github.com/pipecat-ai/pipecat/pull/3584))
|
||||
|
||||
- Simplified context aggregators to use `frame.append_to_context` flag instead
|
||||
of tracking internal state.
|
||||
- Cleaner logic in `LLMResponseAggregator` and
|
||||
`LLMResponseUniversalAggregator`
|
||||
- More consistent behavior across aggregator implementations
|
||||
(PR [#3584](https://github.com/pipecat-ai/pipecat/pull/3584))
|
||||
|
||||
- Updated timestamps to be cumulative within an agent turn, using
|
||||
flushCompleted message as an indication of when timestamps from the server
|
||||
are reset to 0
|
||||
(PR [#3593](https://github.com/pipecat-ai/pipecat/pull/3593))
|
||||
|
||||
- Changed `KokoroTTSService` to use `kokoro-onnx` instead of `kokoro` as the
|
||||
underlying TTS engine.
|
||||
(PR [#3612](https://github.com/pipecat-ai/pipecat/pull/3612))
|
||||
|
||||
- Improved user turn stop timing in `TranscriptionUserTurnStopStrategy` and
|
||||
`TurnAnalyzerUserTurnStopStrategy`.
|
||||
- Timeout now starts on `VADUserStoppedSpeakingFrame` for tighter, more
|
||||
predictable timing
|
||||
- Added support for finalized transcripts
|
||||
(`TranscriptionFrame.finalized=True`) to trigger earlier
|
||||
- Added fallback timeout for edge cases where transcripts arrive without
|
||||
VAD events
|
||||
- Removed `InterimTranscriptionFrame` handling (no longer affects timing)
|
||||
(PR [#3637](https://github.com/pipecat-ai/pipecat/pull/3637))
|
||||
|
||||
- Improved the accuracy of the `UserBotLatencyObserver` and
|
||||
`UserBotLatencyLogObserver` by measuring from the time when the user actually
|
||||
starts speaking.
|
||||
(PR [#3637](https://github.com/pipecat-ai/pipecat/pull/3637))
|
||||
|
||||
- ⚠️ Renamed `timeout` parameter to `user_speech_timeout` in
|
||||
`TranscriptionUserTurnStopStrategy`.
|
||||
(PR [#3637](https://github.com/pipecat-ai/pipecat/pull/3637))
|
||||
|
||||
- Updated the `VADUserStartedSpeakingFrame` to include `start_secs` and
|
||||
`timestamp` and `VADUserStoppedSpeakingFrame` to include `stop_secs` and
|
||||
`timestamp`, removing the need to separately handle the
|
||||
`SpeechControlParamsFrame` for VADParams values.
|
||||
(PR [#3637](https://github.com/pipecat-ai/pipecat/pull/3637))
|
||||
|
||||
- ⚠️ Renamed `TranscriptionUserTurnStopStrategy` to
|
||||
`SpeechTimeoutUserTurnStopStrategy`. The old name is deprecated and will be
|
||||
removed in a future release.
|
||||
(PR [#3637](https://github.com/pipecat-ai/pipecat/pull/3637))
|
||||
|
||||
- `AssemblyAISTTService` now automatically configures optimal settings for
|
||||
manual turn detection when `vad_force_turn_endpoint=True`. This sets
|
||||
`end_of_turn_confidence_threshold=1.0` and `max_turn_silence=2000` by
|
||||
default, which disables model-based turn detection and reduces latency by
|
||||
relying on external VAD for turn endpoints. Warnings are logged if
|
||||
conflicting settings are detected.
|
||||
(PR [#3644](https://github.com/pipecat-ai/pipecat/pull/3644))
|
||||
|
||||
- Upgraded the `pipecat-ai-small-webrtc-prebuilt` package to v2.1.0.
|
||||
(PR [#3652](https://github.com/pipecat-ai/pipecat/pull/3652))
|
||||
|
||||
- Changed default session mode from "CUSTOM" to "LITE" in HeyGen LiveAvatar
|
||||
integration, with VP8 as the default video encoding.
|
||||
(PR [#3653](https://github.com/pipecat-ai/pipecat/pull/3653))
|
||||
|
||||
- ⚠️ The default `VADParams` `stop_secs` default is changing from `0.8` seconds
|
||||
to `0.2` seconds. This change both simplifies the developer experience and
|
||||
improves the performance of STT services. With a shorter `stop_secs` value,
|
||||
STT services using a local VAD can finalize sooner, resulting in faster
|
||||
transcription.
|
||||
- `SpeechTimeoutUserTurnStopStrategy`: control how long to wait for
|
||||
additional user speech using `user_speech_timeout` (default: 0.6 sec).
|
||||
- `TurnAnalyzerUserTurnStopStrategy`: the turn analyzer automatically
|
||||
adjusts the user wait time based on the audio input.
|
||||
(PR [#3659](https://github.com/pipecat-ai/pipecat/pull/3659))
|
||||
|
||||
- Moved interruption wait event from per-processor instance state to
|
||||
`InterruptionFrame` itself. Added `InterruptionFrame.complete()` to signal
|
||||
when the interruption has fully traversed the pipeline. Custom processors
|
||||
that block or consume an `InterruptionFrame` before it reaches the pipeline
|
||||
sink must call `frame.complete()` to avoid stalling
|
||||
`push_interruption_task_frame_and_wait()`. A warning is logged if completion
|
||||
does not happen within 2 seconds.
|
||||
(PR [#3660](https://github.com/pipecat-ai/pipecat/pull/3660))
|
||||
|
||||
- Update the default model to `scribe_v2` for `ElevenLabsSTTService`.
|
||||
(PR [#3664](https://github.com/pipecat-ai/pipecat/pull/3664))
|
||||
|
||||
- Changed the `DeepgramSTTService` default setting for `smart_format` to
|
||||
`False`, as agents don't need smart formatting. Disabling this setting
|
||||
provides a small performance improvement, as well.
|
||||
(PR [#3666](https://github.com/pipecat-ai/pipecat/pull/3666))
|
||||
|
||||
- Changed `FunctionCallCancelFrame` to broadcast in both directions for
|
||||
consistency with other function call frames.
|
||||
(PR [#3672](https://github.com/pipecat-ai/pipecat/pull/3672))
|
||||
|
||||
- Changed default user turn stop strategy from
|
||||
`TranscriptionUserTurnStopStrategy` to `TurnAnalyzerUserTurnStopStrategy`
|
||||
with `LocalSmartTurnAnalyzerV3`.
|
||||
(PR [#3689](https://github.com/pipecat-ai/pipecat/pull/3689))
|
||||
|
||||
- Renamed `RequestMetadataFrame` to `ServiceSwitcherRequestMetadataFrame` and
|
||||
added a `service` field to target a specific service. The frame is now pushed
|
||||
downstream by services after handling instead of being silently consumed.
|
||||
(PR [#3692](https://github.com/pipecat-ai/pipecat/pull/3692))
|
||||
|
||||
- Update `SonioxSTTService` to set `vad_force_turn_endpoint` to `True`. This
|
||||
setting disabled the turn detection logic available natively in Soniox.
|
||||
Instead, Soniox relies on a local VAD to finalize the transcript. This
|
||||
configuration meaningfully reduces the time to final segment for Soniox. With
|
||||
this setting enabled, Soniox outputs a transcript in ~250ms (median). Pipecat
|
||||
enables smart-turn detection by default using the `LocalSmartTurnAnalyzerV3`.
|
||||
To use the native turn detection logic in Soniox, just set
|
||||
`vad_force_turn_endpoint` to `False`.
|
||||
(PR [#3697](https://github.com/pipecat-ai/pipecat/pull/3697))
|
||||
|
||||
- Update `SonioxSTTService` default model to `stt-rt-v4`.
|
||||
(PR [#3697](https://github.com/pipecat-ai/pipecat/pull/3697))
|
||||
|
||||
- Updated the default model to `async_flash_v1.0` and base URL to
|
||||
`https://api.async.com` for `AsyncAITTSService`.
|
||||
(PR [#3701](https://github.com/pipecat-ai/pipecat/pull/3701))
|
||||
|
||||
### Deprecated
|
||||
|
||||
- Deprecated `UserBotLatencyLogObserver`. Use `UserBotLatencyObserver` directly
|
||||
with its `on_latency_measured` event handler instead.
|
||||
(PR [#3355](https://github.com/pipecat-ai/pipecat/pull/3355))
|
||||
|
||||
- Deprecated `RTVILLMFunctionCallMessage`, `RTVILLMFunctionCallMessageData`,
|
||||
and `RTVIProcessor.handle_function_call()`. Use the new
|
||||
`llm-function-call-in-progress` event sent automatically by `RTVIObserver`
|
||||
instead.
|
||||
(PR [#3630](https://github.com/pipecat-ai/pipecat/pull/3630))
|
||||
|
||||
### Removed
|
||||
|
||||
- ⚠️ Removed `timeout` parameter from `TurnAnalyzerUserTurnStopStrategy`. The
|
||||
timeout is now managed internally based on STT latency.
|
||||
(PR [#3637](https://github.com/pipecat-ai/pipecat/pull/3637))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed pipeline freeze when `InterruptionFrame` discards `EndFrame` or
|
||||
`StopFrame` by making terminal frames uninterruptible.
|
||||
(PR [#3542](https://github.com/pipecat-ai/pipecat/pull/3542))
|
||||
|
||||
- Fixed OpenAI LLM stream not being closed on cancellation/exception, which
|
||||
could leak sockets.
|
||||
(PR [#3589](https://github.com/pipecat-ai/pipecat/pull/3589))
|
||||
|
||||
- Fixed `PipelineTask` adding duplicate `RTVIProcessor` and `RTVIObserver` when
|
||||
they were already provided in the pipeline or observers list. They are now
|
||||
detected and skipped, with appropriate warnings and errors logged for
|
||||
mismatched configurations.
|
||||
(PR [#3610](https://github.com/pipecat-ai/pipecat/pull/3610))
|
||||
|
||||
- Fixed function call timeout task not being cancelled when the handler
|
||||
completes without calling `result_callback` or is cancelled externally, which
|
||||
caused `RuntimeWarning: coroutine was never awaited`.
|
||||
(PR [#3616](https://github.com/pipecat-ai/pipecat/pull/3616))
|
||||
|
||||
- Fixed sentence splitting for Japanese, Chinese, Korean, and other non-Latin
|
||||
languages in TTS pipeline. NLTK's sentence tokenizer does not support CJK
|
||||
languages, causing text to accumulate until flush instead of being split at
|
||||
sentence boundaries. Added fallback detection for unambiguous non-Latin
|
||||
sentence-ending punctuation (e.g., `。`, `?`, `!`).
|
||||
(PR [#3617](https://github.com/pipecat-ai/pipecat/pull/3617))
|
||||
|
||||
- Fixed `PipelineTask` to also call `set_bot_ready()` when an external
|
||||
`RTVIProcessor` is provided.
|
||||
(PR [#3623](https://github.com/pipecat-ai/pipecat/pull/3623))
|
||||
|
||||
- Fixed `VADController` not broadcasting `SpeechControlParamsFrame` on startup,
|
||||
which prevented STT services from receiving VAD params needed for TTFB
|
||||
measurement.
|
||||
(PR [#3628](https://github.com/pipecat-ai/pipecat/pull/3628))
|
||||
|
||||
- Fixed `StopAsyncIteration` exceptions in `parse_telephony_websocket()` when
|
||||
WebSocket connections close before sending expected messages.
|
||||
(PR [#3629](https://github.com/pipecat-ai/pipecat/pull/3629))
|
||||
|
||||
- Fixed WebSocket transport error when broadcasting
|
||||
`InputTransportMessageFrame` by correctly instantiating the frame with its
|
||||
message parameter.
|
||||
(PR [#3635](https://github.com/pipecat-ai/pipecat/pull/3635))
|
||||
|
||||
- Fixed orphan OpenTelemetry spans during flow initialization and transitions
|
||||
in tracing.
|
||||
(PR [#3649](https://github.com/pipecat-ai/pipecat/pull/3649))
|
||||
|
||||
- Fixed `SambaNovaLLMService` and `GoogleLLMOpenAIBetaService` streams not
|
||||
being closed on cancellation/exception, which could leak sockets.
|
||||
(PR [#3663](https://github.com/pipecat-ai/pipecat/pull/3663))
|
||||
|
||||
- Fixed an issue in `InworldTTSService` where punctuation was pronounced. Now,
|
||||
the `InworldTTSService` ensures proper spacing between sentences, resolving
|
||||
pronunciation issues.
|
||||
(PR [#3667](https://github.com/pipecat-ai/pipecat/pull/3667))
|
||||
|
||||
- Fixed `ParallelPipeline` allowing frames pushed by internal processors to
|
||||
escape during lifecycle frame (`StartFrame`/`EndFrame`/`CancelFrame`)
|
||||
synchronization. These frames are now buffered and flushed after all branches
|
||||
complete.
|
||||
(PR [#3668](https://github.com/pipecat-ai/pipecat/pull/3668))
|
||||
|
||||
- Fixed issues in Sarvam STT and TTS services: missing event handler
|
||||
registration for VAD signals, `Optional[bool]` type annotations, WebSocket
|
||||
state cleanup on API errors, and TTS disconnect/reconnection state
|
||||
management.
|
||||
(PR [#3671](https://github.com/pipecat-ai/pipecat/pull/3671))
|
||||
|
||||
- Fixed `RTVIObserver` sending duplicate client messages for frames that are
|
||||
broadcast in both directions (e.g. `UserStartedSpeakingFrame`,
|
||||
`FunctionCallResultFrame`).
|
||||
(PR [#3672](https://github.com/pipecat-ai/pipecat/pull/3672))
|
||||
|
||||
- Fixed WebSocket STT services (ElevenLabs, Cartesia, Gladia, Soniox)
|
||||
disconnecting due to idle timeout when no audio is being sent (e.g. when
|
||||
inactive behind a `ServiceSwitcher`). `WebsocketSTTService` now provides
|
||||
opt-in silence-based keepalive via `keepalive_timeout` and
|
||||
`keepalive_interval` parameters.
|
||||
(PR [#3675](https://github.com/pipecat-ai/pipecat/pull/3675))
|
||||
|
||||
## [0.0.101] - 2026-01-30
|
||||
|
||||
### Added
|
||||
|
||||
- Additions for `AICFilter` and `AICVADAnalyzer`:
|
||||
- Added model downloading support to `AICFilter` with `model_id` and
|
||||
`model_download_dir` parameters.
|
||||
- Added `model_path` parameter to `AICFilter` for loading local `.aicmodel`
|
||||
files.
|
||||
- Added unit tests for `AICFilter` and `AICVADAnalyzer`.
|
||||
(PR [#3408](https://github.com/pipecat-ai/pipecat/pull/3408))
|
||||
|
||||
- Added handling for `server_content.interrupted` signal in the Gemini Live
|
||||
service for faster interruption response in the case where there isn't
|
||||
already turn tracking in the pipeline, e.g. local VAD + context aggregators.
|
||||
When there is already turn tracking in the pipeline, the additional
|
||||
interruption does no harm.
|
||||
(PR [#3429](https://github.com/pipecat-ai/pipecat/pull/3429))
|
||||
|
||||
- Added new `GenesysFrameSerializer` for the Genesys AudioHook WebSocket
|
||||
protocol, enabling bidirectional audio streaming between Pipecat pipelines
|
||||
and Genesys Cloud contact center.
|
||||
(PR [#3500](https://github.com/pipecat-ai/pipecat/pull/3500))
|
||||
|
||||
- Added `reached_upstream_types` and `reached_downstream_types` read-only
|
||||
properties to `PipelineTask` for inspecting current frame filters.
|
||||
(PR [#3510](https://github.com/pipecat-ai/pipecat/pull/3510))
|
||||
|
||||
- Added `add_reached_upstream_filter()` and `add_reached_downstream_filter()`
|
||||
methods to `PipelineTask` for appending frame types.
|
||||
(PR [#3510](https://github.com/pipecat-ai/pipecat/pull/3510))
|
||||
|
||||
- Added `UserTurnCompletionLLMServiceMixin` for LLM services to detect and
|
||||
filter incomplete user turns. When enabled via `filter_incomplete_user_turns`
|
||||
in `LLMUserAggregatorParams`, the LLM outputs a turn completion marker at the
|
||||
start of each response: ✓ (complete), ○ (incomplete short), or ◐ (incomplete
|
||||
long). Incomplete turns are suppressed, and configurable timeouts
|
||||
automatically re-prompt the user.
|
||||
(PR [#3518](https://github.com/pipecat-ai/pipecat/pull/3518))
|
||||
|
||||
- Added `FrameProcessor.broadcast_frame_instance(frame)` method to broadcast a
|
||||
frame instance by extracting its fields and creating new instances for each
|
||||
direction.
|
||||
(PR [#3519](https://github.com/pipecat-ai/pipecat/pull/3519))
|
||||
|
||||
- `PipelineTask` now automatically adds `RTVIProcessor` and registers
|
||||
`RTVIObserver` when `enable_rtvi=True` (default), simplifying pipeline setup.
|
||||
(PR [#3519](https://github.com/pipecat-ai/pipecat/pull/3519))
|
||||
|
||||
- Added `RTVIProcessor.create_rtvi_observer()` factory method for creating RTVI
|
||||
observers.
|
||||
(PR [#3519](https://github.com/pipecat-ai/pipecat/pull/3519))
|
||||
|
||||
- Added `video_out_codec` parameter to `TransportParams` allowing configuration
|
||||
of the preferred video codec (e.g., `"VP8"`, `"H264"`, `"H265"`) for video
|
||||
output in `DailyTransport`.
|
||||
(PR [#3520](https://github.com/pipecat-ai/pipecat/pull/3520))
|
||||
|
||||
- Added `location` parameter to Google TTS services (`GoogleHttpTTSService`,
|
||||
`GoogleTTSService`, `GeminiTTSService`) for regional endpoint support.
|
||||
(PR [#3523](https://github.com/pipecat-ai/pipecat/pull/3523))
|
||||
|
||||
- Added new `PIPECAT_SMART_TURN_LOG_DATA` environment variable, which causes
|
||||
Smart Turn input data to be saved to disk
|
||||
(PR [#3525](https://github.com/pipecat-ai/pipecat/pull/3525))
|
||||
|
||||
- Added `result_callback` parameter to `UserImageRequestFrame` to support
|
||||
deferred function call results.
|
||||
(PR [#3571](https://github.com/pipecat-ai/pipecat/pull/3571))
|
||||
|
||||
- Added `function_call_timeout_secs` parameter to `LLMService` to configure
|
||||
timeout for deferred function calls (defaults to 10.0 seconds).
|
||||
(PR [#3571](https://github.com/pipecat-ai/pipecat/pull/3571))
|
||||
|
||||
- Added `vad_analyzer` parameter to `LLMUserAggregatorParams`. VAD analysis is
|
||||
now handled inside the `LLMUserAggregator` rather than in the transport,
|
||||
keeping voice activity detection closer to where it is consumed. The
|
||||
`vad_analyzer` on `BaseInputTransport` is now deprecated.
|
||||
|
||||
```python
|
||||
context_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
```
|
||||
(PR [#3583](https://github.com/pipecat-ai/pipecat/pull/3583))
|
||||
|
||||
- Added `VADProcessor` for detecting speech in audio streams within a pipeline.
|
||||
Pushes `VADUserStartedSpeakingFrame`, `VADUserStoppedSpeakingFrame`, and
|
||||
`UserSpeakingFrame` downstream based on VAD state changes.
|
||||
(PR [#3583](https://github.com/pipecat-ai/pipecat/pull/3583))
|
||||
|
||||
- Added `VADController` for managing voice activity detection state and
|
||||
emitting speech events independently of transport or pipeline processors.
|
||||
(PR [#3583](https://github.com/pipecat-ai/pipecat/pull/3583))
|
||||
|
||||
- Added local `PiperTTSService` for offline text-to-speech using Piper voice
|
||||
models. The existing HTTP-based service has been renamed to
|
||||
`PiperHttpTTSService`.
|
||||
(PR [#3585](https://github.com/pipecat-ai/pipecat/pull/3585))
|
||||
|
||||
- `main()` in `pipecat.runner.run` now accepts an optional
|
||||
`argparse.ArgumentParser`, allowing bots to define custom CLI arguments
|
||||
accessible via `runner_args.cli_args`.
|
||||
(PR [#3590](https://github.com/pipecat-ai/pipecat/pull/3590))
|
||||
|
||||
- Added `KokoroTTSService` for local text-to-speech synthesis using the
|
||||
Kokoro-82M model.
|
||||
(PR [#3595](https://github.com/pipecat-ai/pipecat/pull/3595))
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `AICFilter` and `AICVADAnalyzer` to use aic-sdk ~= 2.0.1.
|
||||
(PR [#3408](https://github.com/pipecat-ai/pipecat/pull/3408))
|
||||
|
||||
- Improved the STT TTFB (Time To First Byte) measurement, reporting the delay
|
||||
between when the user stops speaking and when the final transcription is
|
||||
received. Note: Unlike traditional TTFB which measures from a discrete
|
||||
request, STT services receive continuous audio input—so we measure from
|
||||
speech end to final transcript, which captures the latency that matters for
|
||||
voice AI applications. In support of this change, added `finalized` field to
|
||||
`TranscriptionFrame` to indicate when a transcript is the final result for an
|
||||
utterance.
|
||||
(PR [#3495](https://github.com/pipecat-ai/pipecat/pull/3495))
|
||||
|
||||
- `SarvamSTTService` now defaults `vad_signals` and `high_vad_sensitivity` to
|
||||
`None` (omitted from connection parameters), improving latency by ~300ms
|
||||
compared to the previous defaults.
|
||||
(PR [#3495](https://github.com/pipecat-ai/pipecat/pull/3495))
|
||||
|
||||
- Changed frame filter storage from tuples to sets in `PipelineTask`.
|
||||
(PR [#3510](https://github.com/pipecat-ai/pipecat/pull/3510))
|
||||
|
||||
- Changed default Inworld TTS model from `inworld-tts-1` to
|
||||
`inworld-tts-1.5-max`.
|
||||
(PR [#3531](https://github.com/pipecat-ai/pipecat/pull/3531))
|
||||
|
||||
- `FrameSerializer` now subclasses from `BaseObject` to enable event support.
|
||||
(PR [#3560](https://github.com/pipecat-ai/pipecat/pull/3560))
|
||||
|
||||
- Added support for TTFS in `SpeechmaticsSTTService` and set the default mode
|
||||
to `EXTERNAL` to support Pipecat-controlled VAD.
|
||||
- Changed dependency to `speechmatics-voice[smart]>=0.2.8`
|
||||
(PR [#3562](https://github.com/pipecat-ai/pipecat/pull/3562))
|
||||
|
||||
- ⚠️ Changed function call handling to use timeout-based completion instead of
|
||||
immediate callback execution.
|
||||
- Function calls that defer their results (e.g., `UserImageRequestFrame`)
|
||||
now use a timeout mechanism
|
||||
- The `result_callback` is invoked automatically when the deferred
|
||||
operation completes or after timeout
|
||||
- This change affects examples using `UserImageRequestFrame` - the
|
||||
`result_callback` should now be passed to the frame instead of being called
|
||||
immediately
|
||||
(PR [#3571](https://github.com/pipecat-ai/pipecat/pull/3571))
|
||||
|
||||
- Pipecat runner now uses `DAILY_ROOM_URL` instead of `DAILY_SAMPLE_ROOM_URL`.
|
||||
(PR [#3582](https://github.com/pipecat-ai/pipecat/pull/3582))
|
||||
|
||||
- Updates to `GradiumSTTService`:
|
||||
- Now flushes pending transcriptions when VAD detects the user stopped
|
||||
speaking, improving response latency.
|
||||
- `GradiumSTTService` now supports `InputParams` for configuring `language`
|
||||
and `delay_in_frames` settings.
|
||||
(PR [#3587](https://github.com/pipecat-ai/pipecat/pull/3587))
|
||||
|
||||
### Deprecated
|
||||
|
||||
- ⚠️ Deprecated `vad_analyzer` parameter on `BaseInputTransport`. Pass
|
||||
`vad_analyzer` to `LLMUserAggregatorParams` instead or use `VADProcessor` in
|
||||
the pipeline.
|
||||
(PR [#3583](https://github.com/pipecat-ai/pipecat/pull/3583))
|
||||
|
||||
### Removed
|
||||
|
||||
- Removed deprecated `AICFilter` parameters: `enhancement_level`, `voice_gain`,
|
||||
`noise_gate_enable`.
|
||||
(PR [#3408](https://github.com/pipecat-ai/pipecat/pull/3408))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where if you were using `OpenRouterLLMService` with a Gemini
|
||||
model, it wouldn't handle multiple `"system"` messages as expected (and as we
|
||||
do in `GoogleLLMService`), which is to convert subsequent ones into `"user"`
|
||||
messages. Instead, the latest `"system"` message would overwrite the previous
|
||||
ones.
|
||||
(PR [#3406](https://github.com/pipecat-ai/pipecat/pull/3406))
|
||||
|
||||
- Transports now properly broadcast `InputTransportMessageFrame` frames both
|
||||
upstream and downstream instead of only pushing downstream.
|
||||
(PR [#3519](https://github.com/pipecat-ai/pipecat/pull/3519))
|
||||
|
||||
- Fixed `FrameProcessor.broadcast_frame()` to deep copy kwargs, preventing
|
||||
shared mutable references between the downstream and upstream frame
|
||||
instances.
|
||||
(PR [#3519](https://github.com/pipecat-ai/pipecat/pull/3519))
|
||||
|
||||
- Fixed OpenAI LLM services to emit `ErrorFrame` on completion timeout,
|
||||
enabling proper error handling and LLMSwitcher failover.
|
||||
(PR [#3529](https://github.com/pipecat-ai/pipecat/pull/3529))
|
||||
|
||||
- Fixed a logging issue where non-ASCII characters (e.g., Japanese, Chinese,
|
||||
etc.) were being unnecessarily escaped to Unicode sequences when function
|
||||
call occurred.
|
||||
(PR [#3536](https://github.com/pipecat-ai/pipecat/pull/3536))
|
||||
|
||||
- Fixed how audio tracks are synchronized inside the `AudioBufferProcessor` to
|
||||
fix timing issues where silence and audio were misaligned between user and
|
||||
bot buffers.
|
||||
(PR [#3541](https://github.com/pipecat-ai/pipecat/pull/3541))
|
||||
|
||||
- Fixed race condition in `OpenAIRealtimeBetaLLMService` that could cause an
|
||||
error when truncating the conversation.
|
||||
(PR [#3567](https://github.com/pipecat-ai/pipecat/pull/3567))
|
||||
|
||||
- Fixed an infinite loop in `WebsocketService` that blocked the event loop when
|
||||
a remote server closed the connection gracefully.
|
||||
(PR [#3574](https://github.com/pipecat-ai/pipecat/pull/3574))
|
||||
|
||||
- Fixed `LLMUserAggregator` and `LLMAssistantAggregator` not emitting pending
|
||||
transcripts via `on_user_turn_stopped` and `on_assistant_turn_stopped` events
|
||||
when the conversation ends (`EndFrame`) or is cancelled (`CancelFrame`).
|
||||
(PR [#3575](https://github.com/pipecat-ai/pipecat/pull/3575))
|
||||
|
||||
- Added missing `LiveKitRunnerArguments` and `LiveKitTransport` support in
|
||||
runner utilities to enable LiveKit transport configuration.
|
||||
(PR [#3580](https://github.com/pipecat-ai/pipecat/pull/3580))
|
||||
|
||||
- Fixed race condition in `OpenAIRealtimeLLMService` that could cause an error
|
||||
when truncating the conversation.
|
||||
(PR [#3581](https://github.com/pipecat-ai/pipecat/pull/3581))
|
||||
|
||||
- Fixed `PiperHttpTTSService` (olf `PiperTTSService`) to resample audio output
|
||||
based on the model's sample rate parsed from the WAV header.
|
||||
(PR [#3585](https://github.com/pipecat-ai/pipecat/pull/3585))
|
||||
|
||||
- Fixed `UserTurnController` to reset user turn timeout when interim
|
||||
transcriptions are received.
|
||||
(PR [#3594](https://github.com/pipecat-ai/pipecat/pull/3594))
|
||||
|
||||
- Fixed an issue in the `IVRNavigator` where the `TextFrame`s pushed had
|
||||
incorrect spacing. Now, the internal `IVRProcessor` pushes
|
||||
`AggregatedTextFrame`s when in conversation mode. This allows for controlling
|
||||
spacing of the outputted, aggregated text.
|
||||
(PR [#3604](https://github.com/pipecat-ai/pipecat/pull/3604))
|
||||
|
||||
- Fixed `GeminiLiveLLMService` transcription timeout handler not being
|
||||
scheduled by yielding to the event loop after task creation.
|
||||
(PR [#3605](https://github.com/pipecat-ai/pipecat/pull/3605))
|
||||
|
||||
## [0.0.100] - 2026-01-20
|
||||
|
||||
### Added
|
||||
|
||||
- Added Hathora service to support Hathora-hosted TTS and STT models (only
|
||||
non-streaming)
|
||||
(PR [#3169](https://github.com/pipecat-ai/pipecat/pull/3169))
|
||||
|
||||
- Added `CambTTSService`, using Camb.ai's TTS integration with MARS models
|
||||
(mars-flash, mars-pro, mars-instruct) for high-quality text-to-speech
|
||||
synthesis.
|
||||
(PR [#3349](https://github.com/pipecat-ai/pipecat/pull/3349))
|
||||
|
||||
- Added the `additional_headers` param to `WebsocketClientParams`, allowing
|
||||
`WebsocketClientTransport` to send custom headers on connect, for cases such
|
||||
as authentication.
|
||||
(PR [#3461](https://github.com/pipecat-ai/pipecat/pull/3461))
|
||||
|
||||
- Added `UserIdleController` for detecting user idle state, integrated into
|
||||
`LLMUserAggregator` and `UserTurnProcessor` via optional `user_idle_timeout`
|
||||
parameter. Emits `on_user_turn_idle` event for application-level handling.
|
||||
Deprecated `UserIdleProcessor` in favor of the new compositional approach.
|
||||
(PR [#3482](https://github.com/pipecat-ai/pipecat/pull/3482))
|
||||
|
||||
- Added `on_user_mute_started` and `on_user_mute_stopped` event handlers to
|
||||
`LLMUserAggregator` for tracking user mute state changes.
|
||||
(PR [#3490](https://github.com/pipecat-ai/pipecat/pull/3490))
|
||||
|
||||
### Changed
|
||||
|
||||
- Enhanced interruption handling in `AsyncAITTSService` by supporting
|
||||
multi-context WebSocket sessions for more robust context management.
|
||||
(PR [#3287](https://github.com/pipecat-ai/pipecat/pull/3287))
|
||||
|
||||
- Throttle `UserSpeakingFrame` to broadcast at most every 200ms instead of on
|
||||
every audio chunk, reducing frame processing overhead during user speech.
|
||||
(PR [#3483](https://github.com/pipecat-ai/pipecat/pull/3483))
|
||||
|
||||
### Deprecated
|
||||
|
||||
- For consistency with other package names, we just deprecated
|
||||
`pipecat.turns.mute` (introduced in Pipecat 0.0.99) in favor of
|
||||
`pipecat.turns.user_mute`.
|
||||
(PR [#3479](https://github.com/pipecat-ai/pipecat/pull/3479))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Corrected TTFB metric calculation in `AsyncAIHttpTTSService`.
|
||||
(PR [#3287](https://github.com/pipecat-ai/pipecat/pull/3287))
|
||||
|
||||
- Fixed an issue where the "bot-llm-text" RTVI event would not fire for
|
||||
realtime (speech-to-speech) services:
|
||||
|
||||
- `AWSNovaSonicLLMService`
|
||||
- `GeminiLiveLLMService`
|
||||
- `OpenAIRealtimeLLMService`
|
||||
- `GrokRealtimeLLMService`
|
||||
|
||||
The issue was that these services weren't pushing `LLMTextFrame`s. Now
|
||||
they do.
|
||||
(PR [#3446](https://github.com/pipecat-ai/pipecat/pull/3446))
|
||||
|
||||
- Fixed an issue where `on_user_turn_stop_timeout` could fire while a user is
|
||||
talking when using `ExternalUserTurnStrategies`.
|
||||
(PR [#3454](https://github.com/pipecat-ai/pipecat/pull/3454))
|
||||
|
||||
- Fixed an issue where user turn start strategies were not being reset after a
|
||||
user turn started, causing incorrect strategy behavior.
|
||||
(PR [#3455](https://github.com/pipecat-ai/pipecat/pull/3455))
|
||||
|
||||
- Fixed `MinWordsUserTurnStartStrategy` to not aggregate transcriptions,
|
||||
preventing incorrect turn starts when words are spoken with pauses between
|
||||
them.
|
||||
(PR [#3462](https://github.com/pipecat-ai/pipecat/pull/3462))
|
||||
|
||||
- Fixed an issue where Grok Realtime would error out when running with
|
||||
SmallWebRTC transport.
|
||||
(PR [#3480](https://github.com/pipecat-ai/pipecat/pull/3480))
|
||||
|
||||
- Fixed a `Mem0MemoryService` issue where passing `async_mode: true` was
|
||||
causing an error. See
|
||||
https://docs.mem0.ai/platform/features/async-mode-default-change.
|
||||
(PR [#3484](https://github.com/pipecat-ai/pipecat/pull/3484))
|
||||
|
||||
- Fixed `AWSNovaSonicLLMService.reset_conversation()`, which would previously
|
||||
error out. Now it successfully reconnects and "rehydrates" from the context
|
||||
object.
|
||||
(PR [#3486](https://github.com/pipecat-ai/pipecat/pull/3486))
|
||||
|
||||
- Fixed `AzureTTSService` transcript formatting issues:
|
||||
- Punctuation now appears without extra spaces (e.g., "Hello!" instead of
|
||||
"Hello !")
|
||||
- CJK languages (Chinese, Japanese, Korean) no longer have unwanted spaces
|
||||
between characters
|
||||
(PR [#3489](https://github.com/pipecat-ai/pipecat/pull/3489))
|
||||
|
||||
- Fixed an issue where `UninterruptibleFrame` frames would not be preserved in
|
||||
some cases.
|
||||
(PR [#3494](https://github.com/pipecat-ai/pipecat/pull/3494))
|
||||
|
||||
- Fixed memory leak in `LiveKitTransport` when `video_in_enabled` is `False`.
|
||||
(PR [#3499](https://github.com/pipecat-ai/pipecat/pull/3499))
|
||||
|
||||
- Fixed an issue in `AIService` where unhandled exceptions in `start()`,
|
||||
`stop()`, or `cancel()` implementations would prevent `process_frame()` to
|
||||
continue and therefore `StartFrame`, `EndFrame`, or `CancelFrame` from being
|
||||
pushed downstream, causing the pipeline to not start or stop properly.
|
||||
(PR [#3503](https://github.com/pipecat-ai/pipecat/pull/3503))
|
||||
|
||||
- Moved `NVIDIATTSService` and `NVIDIASTTService` client initialization from
|
||||
constructor to `start()` for better error handling.
|
||||
(PR [#3504](https://github.com/pipecat-ai/pipecat/pull/3504))
|
||||
|
||||
- Optimized `NVIDIATTSService` to process incoming audio frames immediately.
|
||||
(PR [#3509](https://github.com/pipecat-ai/pipecat/pull/3509))
|
||||
|
||||
- Optimized `NVIDIASTTService` by removing unnecessary queue and task.
|
||||
(PR [#3509](https://github.com/pipecat-ai/pipecat/pull/3509))
|
||||
|
||||
- Fixed a `CambTTSService` issue where client was being initialized in the
|
||||
constructor which wouldn't allow for proper Pipeline error handling.
|
||||
(PR [#3511](https://github.com/pipecat-ai/pipecat/pull/3511))
|
||||
|
||||
## [0.0.99] - 2026-01-13
|
||||
|
||||
### Added
|
||||
|
||||
155
CLAUDE.md
Normal file
155
CLAUDE.md
Normal file
@@ -0,0 +1,155 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
Pipecat is an open-source Python framework for building real-time voice and multimodal conversational AI agents. It orchestrates audio/video, AI services, transports, and conversation pipelines using a frame-based architecture.
|
||||
|
||||
## Common Commands
|
||||
|
||||
```bash
|
||||
# Setup development environment
|
||||
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp
|
||||
|
||||
# Install pre-commit hooks
|
||||
uv run pre-commit install
|
||||
|
||||
# Run all tests
|
||||
uv run pytest
|
||||
|
||||
# Run a single test file
|
||||
uv run pytest tests/test_name.py
|
||||
|
||||
# Run a specific test
|
||||
uv run pytest tests/test_name.py::test_function_name
|
||||
|
||||
# Preview changelog
|
||||
towncrier build --draft --version Unreleased
|
||||
|
||||
# Lint and format check
|
||||
uv run ruff check
|
||||
uv run ruff format --check
|
||||
|
||||
# Update dependencies (after editing pyproject.toml)
|
||||
uv lock && uv sync
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
### Frame-Based Pipeline Processing
|
||||
|
||||
All data flows as **Frame** objects through a pipeline of **FrameProcessors**:
|
||||
|
||||
```
|
||||
[Processor1] → [Processor2] → ... → [ProcessorN]
|
||||
```
|
||||
|
||||
**Key components:**
|
||||
|
||||
- **Frames** (`src/pipecat/frames/frames.py`): Data units (audio, text, video) and control signals. Flow DOWNSTREAM (input→output) or UPSTREAM (acknowledgments/errors).
|
||||
|
||||
- **FrameProcessor** (`src/pipecat/processors/frame_processor.py`): Base processing unit. Each processor receives frames, processes them, and pushes results downstream.
|
||||
|
||||
- **Pipeline** (`src/pipecat/pipeline/pipeline.py`): Chains processors together.
|
||||
|
||||
- **ParallelPipeline** (`src/pipecat/pipeline/parallel_pipeline.py`): Runs multiple pipelines in parallel.
|
||||
|
||||
- **Transports** (`src/pipecat/transports/`): Transports are frame processors used for external I/O layer (Daily WebRTC, LiveKit WebRTC, WebSocket, Local). Abstract interface via `BaseTransport`, `BaseInputTransport` and `BaseOutputTransport`.
|
||||
|
||||
- **Pipeline Task (`src/pipecat/pipeline/task.py`)**: Runs and manages a pipeline. Pipeline tasks send the first frame, `StartFrame`, to the pipeline in order for processors to know they can start processing and pushing frames. Pipeline tasks internally create a pipeline with two additional processors, a source processor before the user-defined pipeline and a sink processor at the end. Those are used for multiple things: error handling, pipeline task level events, heartbeat monitoring, etc.
|
||||
|
||||
- **Pipeline Runner (`src/pipecat/pipeline/runner.py`)**: High-level entry point for executing pipeline tasks. Handles signal management (SIGINT/SIGTERM) for graceful shutdown and optional garbage collection. Run a single pipeline task with `await runner.run(task)` or multiple concurrently with `await asyncio.gather(runner.run(task1), runner.run(task2))`.
|
||||
|
||||
- **Services** (`src/pipecat/services/`): 60+ AI provider integrations (STT, TTS, LLM, etc.). Extend base classes: `AIService`, `LLMService`, `STTService`, `TTSService`, `VisionService`.
|
||||
|
||||
- **Serializers** (`src/pipecat/serializers/`): Convert frames to/from wire formats for WebSocket transports. `FrameSerializer` base class defines `serialize()` and `deserialize()`. Telephony serializers (Twilio, Plivo, Vonage, Telnyx, Exotel, Genesys) handle provider-specific protocols and audio encoding (e.g., μ-law).
|
||||
|
||||
- **RTVI** (`src/pipecat/processors/frameworks/rtvi.py`): Real-Time Voice Interface protocol bridging clients and the pipeline. `RTVIProcessor` handles incoming client messages (text input, audio, function call results). `RTVIObserver` converts pipeline frames to outgoing messages: user/bot speaking events, transcriptions, LLM/TTS lifecycle, function calls, metrics, and audio levels.
|
||||
|
||||
- **Observers** (`src/pipecat/observers/`): Monitor frame flow without modifying the pipeline. Passed to `PipelineTask` via the `observers` parameter. Implement `on_process_frame()` and `on_push_frame()` callbacks.
|
||||
|
||||
### Important Patterns
|
||||
|
||||
- **Context Aggregation**: `LLMContext` accumulates messages for LLM calls; `UserResponse` aggregates user input
|
||||
|
||||
- **Turn Management**: Turn management is done through `LLMUserAggregator` and
|
||||
`LLMAssistantAggregator`, created with `LLMContextAggregatorPair`
|
||||
|
||||
- **User turn strategies**: Detection of when the user starts and stops speaking is done via user turn start/stop strategies. They push `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame` respectively.
|
||||
|
||||
- **Interruptions**: Interruptions are usually triggered by a user turn start strategy (e.g. `VADUserTurnStartStrategy`) but they can be triggered by other processors as well, in which case the user turn start strategies don't need to. An `InterruptionFrame` carries an optional `asyncio.Event` that is set when the frame reaches the pipeline sink. If a processor stops an `InterruptionFrame` from propagating downstream (i.e., doesn't push it), it **must** call `frame.complete()` to avoid stalling `push_interruption_task_frame_and_wait()` callers.
|
||||
|
||||
- **Uninterruptible Frames**: These are frames that will not be removed from internal queues even if there's an interruption. For example, `EndFrame` and `StopFrame`.
|
||||
|
||||
- **Events**: Most classes in Pipecat have `BaseObject` as the very base class. `BaseObject` has support for events. Events can run in the background in an async task (default) or synchronously (`sync=True`) if we want immediate action. Synchronous event handlers need to execute fast.
|
||||
|
||||
- **Async Task Management**: Always use `self.create_task(coroutine, name)` instead of raw `asyncio.create_task()`. The `TaskManager` automatically tracks tasks and cleans them up on processor shutdown. Use `await self.cancel_task(task, timeout)` for cancellation.
|
||||
|
||||
- **Error Handling**: Use `await self.push_error(msg, exception, fatal)` to push errors upstream. Services should use `fatal=False` (the default) so application code can handle errors and take action (e.g. switch to another service).
|
||||
|
||||
### Key Directories
|
||||
|
||||
| Directory | Purpose |
|
||||
|---------------------------|----------------------------------------------------|
|
||||
| `src/pipecat/frames/` | Frame definitions (100+ types) |
|
||||
| `src/pipecat/processors/` | FrameProcessor base + aggregators, filters, audio |
|
||||
| `src/pipecat/pipeline/` | Pipeline orchestration |
|
||||
| `src/pipecat/services/` | AI service integrations (60+ providers) |
|
||||
| `src/pipecat/transports/` | Transport layer (Daily, LiveKit, WebSocket, Local) |
|
||||
| `src/pipecat/serializers/`| Frame serialization for WebSocket protocols |
|
||||
| `src/pipecat/observers/` | Pipeline observers for monitoring frame flow |
|
||||
| `src/pipecat/audio/` | VAD, filters, mixers, turn detection, DTMF |
|
||||
| `src/pipecat/turns/` | User turn management |
|
||||
|
||||
## Code Style
|
||||
|
||||
- **Docstrings**: Google-style. Classes describe purpose; `__init__` has `Args:` section; dataclasses use `Parameters:` section.
|
||||
- **Linting**: Ruff (line length 100). Pre-commit hooks enforce formatting.
|
||||
- **Type hints**: Required for complex async code.
|
||||
|
||||
### Docstring Example
|
||||
|
||||
```python
|
||||
class MyService(LLMService):
|
||||
"""Description of what the service does.
|
||||
|
||||
More detailed description.
|
||||
|
||||
Event handlers available:
|
||||
|
||||
- on_connected: Called when we are connected
|
||||
|
||||
Example::
|
||||
|
||||
@service.event_handler("on_connected")
|
||||
async def on_connected(service, frame):
|
||||
...
|
||||
"""
|
||||
|
||||
def __init__(self, param1: str, **kwargs):
|
||||
"""Initialize the service.
|
||||
|
||||
Args:
|
||||
param1: Description of param1.
|
||||
**kwargs: Additional arguments passed to parent.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
```
|
||||
|
||||
## Service Implementation
|
||||
|
||||
When adding a new service:
|
||||
|
||||
1. Extend the appropriate base class (`STTService`, `TTSService`, `LLMService`, etc.)
|
||||
2. Implement required abstract methods
|
||||
3. Handle necessary frames
|
||||
4. By default, all frames should be pushed in the direction they came
|
||||
5. Push `ErrorFrame` on failures
|
||||
6. Add metrics tracking via `MetricsData` if relevant
|
||||
7. Follow the pattern of existing services in `src/pipecat/services/`
|
||||
|
||||
## Testing
|
||||
|
||||
Test utilities live in `src/pipecat/tests/utils.py`. Use `run_test()` to send frames through a pipeline and assert expected output frames in each direction. Use `SleepFrame(sleep=N)` to add delays between frames.
|
||||
|
||||
@@ -75,13 +75,13 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
||||
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Hathora](https://docs.pipecat.ai/server/services/stt/hathora), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
- Added Hathora service to support Hathora-hosted TTS and STT models (only non-streaming)
|
||||
@@ -1 +0,0 @@
|
||||
- Enhanced interruption handling in `AsyncAITTSService` by supporting multi-context WebSocket sessions for more robust context management.
|
||||
@@ -1 +0,0 @@
|
||||
- Corrected TTFB metric calculation in `AsyncAIHttpTTSService`.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `CambTTSService`, using Camb.ai's TTS integration with MARS models (mars-flash, mars-pro, mars-instruct) for high-quality text-to-speech synthesis.
|
||||
@@ -1,8 +0,0 @@
|
||||
- Fixed an issue where the "bot-llm-text" RTVI event would not fire for realtime (speech-to-speech) services:
|
||||
|
||||
- `AWSNovaSonicLLMService`
|
||||
- `GeminiLiveLLMService`
|
||||
- `OpenAIRealtimeLLMService`
|
||||
- `GrokRealtimeLLMService`
|
||||
|
||||
The issue was that these services weren't pushing `LLMTextFrame`s. Now they do.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed an issue where `on_user_turn_stop_timeout` could fire while a user is talking when using `ExternalUserTurnStrategies`.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed an issue where user turn start strategies were not being reset after a user turn started, causing incorrect strategy behavior.
|
||||
@@ -1 +0,0 @@
|
||||
- Added the `additional_headers` param to `WebsocketClientParams`, allowing `WebsocketClientTransport` to send custom headers on connect, for cases such as authentication.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed `MinWordsUserTurnStartStrategy` to not aggregate transcriptions, preventing incorrect turn starts when words are spoken with pauses between them.
|
||||
@@ -1 +0,0 @@
|
||||
- For consistency with other package names, we just deprecated `pipecat.turns.mute` (introduced in Pipecat 0.0.99) in favor of `pipecat.turns.user_mute`.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed an issue where Grok Realtime would error out when running with SmallWebRTC transport.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `UserIdleController` for detecting user idle state, integrated into `LLMUserAggregator` and `UserTurnProcessor` via optional `user_idle_timeout` parameter. Emits `on_user_turn_idle` event for application-level handling. Deprecated `UserIdleProcessor` in favor of the new compositional approach.
|
||||
@@ -1 +0,0 @@
|
||||
- Throttle `UserSpeakingFrame` to broadcast at most every 200ms instead of on every audio chunk, reducing frame processing overhead during user speech.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed a `Mem0MemoryService` issue where passing `async_mode: true` was causing an error. See https://docs.mem0.ai/platform/features/async-mode-default-change.
|
||||
@@ -1,3 +0,0 @@
|
||||
- Fixed `AzureTTSService` transcript formatting issues:
|
||||
- Punctuation now appears without extra spaces (e.g., "Hello!" instead of "Hello !")
|
||||
- CJK languages (Chinese, Japanese, Korean) no longer have unwanted spaces between characters
|
||||
@@ -1 +0,0 @@
|
||||
- Added `on_user_mute_started` and `on_user_mute_stopped` event handlers to `LLMUserAggregator` for tracking user mute state changes.
|
||||
1
changelog/3759.performance.md
Normal file
1
changelog/3759.performance.md
Normal file
@@ -0,0 +1 @@
|
||||
- Switched `GradiumTTSService` from `InterruptibleWordTTSService` to `AudioContextWordTTSService`, eliminating websocket disconnect/reconnect on every interruption by using `client_req_id`-based multiplexing.
|
||||
1
changelog/3802.fixed.md
Normal file
1
changelog/3802.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed self-referential `pipecat-ai[local-smart-turn-v3]` dependency in `pyproject.toml` that caused Poetry 2.x to fail with a circular dependency error. The underlying packages (`transformers`, `onnxruntime`) are now listed directly in main dependencies.
|
||||
@@ -43,11 +43,12 @@ CEREBRAS_API_KEY=...
|
||||
|
||||
# Daily
|
||||
DAILY_API_KEY=...
|
||||
DAILY_SAMPLE_ROOM_URL=https://...
|
||||
DAILY_ROOM_URL=https://...
|
||||
|
||||
# Deepgram
|
||||
DEEPGRAM_API_KEY=...
|
||||
SAGEMAKER_ENDPOINT_NAME=...
|
||||
SAGEMAKER_STT_ENDPOINT_NAME=...
|
||||
SAGEMAKER_TTS_ENDPOINT_NAME=...
|
||||
|
||||
# DeepSeek
|
||||
DEEPSEEK_API_KEY=...
|
||||
@@ -156,6 +157,10 @@ PLIVO_AUTH_TOKEN=...
|
||||
# Qwen
|
||||
QWEN_API_KEY=...
|
||||
|
||||
# Resemble AI
|
||||
RESEMBLE_API_KEY=
|
||||
RESEMBLE_VOICE_UUID=
|
||||
|
||||
# Rime
|
||||
RIME_API_KEY=...
|
||||
RIME_VOICE_ID=...
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.piper.tts import PiperTTSService
|
||||
from pipecat.services.piper.tts import PiperHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
@@ -24,9 +24,8 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
@@ -39,7 +38,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tts = PiperTTSService(
|
||||
tts = PiperHttpTTSService(
|
||||
base_url=os.getenv("PIPER_BASE_URL"), aiohttp_session=session, sample_rate=24000
|
||||
)
|
||||
|
||||
|
||||
@@ -23,9 +23,8 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
|
||||
@@ -23,9 +23,8 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
|
||||
@@ -23,9 +23,8 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
|
||||
@@ -25,9 +25,8 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
|
||||
@@ -23,9 +23,8 @@ from pipecat.transports.daily.transport import DailyParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
video_out_enabled=True,
|
||||
|
||||
@@ -22,9 +22,8 @@ from pipecat.transports.daily.transport import DailyParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
video_out_enabled=True,
|
||||
|
||||
@@ -17,9 +17,7 @@ from fastapi.responses import RedirectResponse
|
||||
from loguru import logger
|
||||
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -35,8 +33,6 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.smallwebrtc.connection import IceServer, SmallWebRTCConnection
|
||||
from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -64,7 +60,6 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
params=TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -87,11 +82,7 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -12,9 +12,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -28,8 +26,6 @@ from pipecat.runner.daily import configure
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.daily.transport import DailyParams, DailyTransport
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -49,7 +45,6 @@ async def main():
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -70,13 +65,7 @@ async def main():
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -12,9 +12,7 @@ import sys
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
InterruptionFrame,
|
||||
TranscriptionFrame,
|
||||
@@ -35,8 +33,6 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -54,7 +50,6 @@ async def main():
|
||||
params=LiveKitParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -80,11 +75,7 @@ async def main():
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -65,9 +65,8 @@ class MonthPrepender(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_out_enabled=True,
|
||||
|
||||
@@ -9,9 +9,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import Frame, LLMRunFrame, MetricsFrame
|
||||
from pipecat.metrics.metrics import (
|
||||
LLMUsageMetricsData,
|
||||
@@ -36,8 +34,6 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -62,24 +58,20 @@ class MetricsLogger(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -108,11 +100,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
@@ -36,8 +34,6 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -77,9 +73,8 @@ class ImageSyncAggregator(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
@@ -87,7 +82,6 @@ transport_params = {
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
@@ -95,7 +89,6 @@ transport_params = {
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -122,11 +115,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
image_sync_aggregator = ImageSyncAggregator(
|
||||
|
||||
@@ -9,9 +9,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -29,30 +27,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -79,11 +71,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -9,9 +9,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -29,29 +27,23 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -78,11 +70,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -33,9 +33,8 @@ from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
|
||||
@@ -10,9 +10,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -32,29 +30,23 @@ from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -119,13 +111,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -15,9 +15,7 @@ from langchain_core.runnables.history import RunnableWithMessageHistory
|
||||
from langchain_openai import ChatOpenAI
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMMessagesUpdateFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -35,8 +33,6 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -50,24 +46,20 @@ def get_session_history(session_id: str) -> BaseChatMessageHistory:
|
||||
return message_store[session_id]
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -105,11 +97,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext()
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -32,9 +32,8 @@ from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
|
||||
@@ -11,9 +11,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,30 +29,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -83,13 +75,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -26,34 +24,28 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.aws.llm import AWSBedrockLLMService
|
||||
from pipecat.services.deepgram.stt_sagemaker import DeepgramSageMakerSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.deepgram.tts_sagemaker import DeepgramSageMakerTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -66,11 +58,19 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# - AWS credentials configured (via environment variables or AWS CLI)
|
||||
# - A deployed SageMaker endpoint with Deepgram model
|
||||
stt = DeepgramSageMakerSTTService(
|
||||
endpoint_name=os.getenv("SAGEMAKER_ENDPOINT_NAME"),
|
||||
endpoint_name=os.getenv("SAGEMAKER_STT_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
)
|
||||
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-2-andromeda-en")
|
||||
# Initialize Deepgram SageMaker TTS Service
|
||||
# This requires:
|
||||
# - AWS credentials configured (via environment variables or AWS CLI)
|
||||
# - A deployed SageMaker endpoint with Deepgram TTS model
|
||||
tts = DeepgramSageMakerTTSService(
|
||||
endpoint_name=os.getenv("SAGEMAKER_TTS_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
voice="aura-2-andromeda-en",
|
||||
)
|
||||
|
||||
llm = AWSBedrockLLMService(
|
||||
aws_region=os.getenv("AWS_REGION"),
|
||||
@@ -88,11 +88,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -33,9 +33,8 @@ from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,30 +28,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -77,11 +69,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -11,9 +11,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,30 +29,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -87,13 +79,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,30 +28,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -80,11 +72,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,29 +28,23 @@ from pipecat.services.playht.tts import PlayHTHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -80,11 +72,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,29 +29,23 @@ from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -82,11 +74,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,29 +28,23 @@ from pipecat.services.azure.tts import AzureHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -86,11 +78,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,29 +28,23 @@ from pipecat.services.azure.tts import AzureTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -86,11 +78,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
127
examples/foundational/07g-interruptible-openai-http.py
Normal file
127
examples/foundational/07g-interruptible-openai-http.py
Normal file
@@ -0,0 +1,127 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.stt import OpenAISTTService
|
||||
from pipecat.services.openai.tts import OpenAITTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = OpenAISTTService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o-transcribe",
|
||||
prompt="Expect words related to dogs, such as breed names.",
|
||||
)
|
||||
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="ballad")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are very knowledgable about dogs. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
user_aggregator, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
assistant_aggregator, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_out_sample_rate=24000,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -25,34 +23,29 @@ from pipecat.processors.aggregators.llm_response_universal import (
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.stt import OpenAISTTService
|
||||
from pipecat.services.openai.stt import OpenAIRealtimeSTTService
|
||||
from pipecat.services.openai.tts import OpenAITTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -60,10 +53,15 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = OpenAISTTService(
|
||||
stt = OpenAIRealtimeSTTService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o-transcribe",
|
||||
prompt="Expect words related to dogs, such as breed names.",
|
||||
language=Language.EN,
|
||||
# Uses local VAD by default.
|
||||
# To enable server-side VAD, set turn_detection=None or
|
||||
# a dict with server_vad settings.
|
||||
# turn_detection={"type": "server_vad", "threshold": 0.5},
|
||||
)
|
||||
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="ballad")
|
||||
@@ -80,11 +78,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -11,9 +11,7 @@ import time
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,29 +29,23 @@ from pipecat.services.openpipe.llm import OpenPipeLLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -85,11 +77,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -11,9 +11,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,29 +29,23 @@ from pipecat.services.xtts.tts import XTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -83,13 +75,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -11,7 +11,6 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -35,24 +34,20 @@ from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -88,7 +83,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(user_turn_strategies=ExternalUserTurnStrategies()),
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=ExternalUserTurnStrategies(),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -32,29 +30,23 @@ from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -89,11 +81,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,10 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,29 +28,23 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -77,11 +68,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,29 +28,23 @@ from pipecat.services.groq.tts import GroqTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -78,11 +70,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMMessagesAppendFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -27,8 +26,6 @@ from pipecat.services.aws.tts import AWSPollyTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
# Strands agent setup
|
||||
try:
|
||||
@@ -41,24 +38,20 @@ except ImportError:
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -122,11 +115,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext()
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -8,9 +8,7 @@
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -28,29 +26,23 @@ from pipecat.services.aws.tts import AWSPollyTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -82,11 +74,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -25,9 +25,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -45,15 +43,11 @@ from pipecat.services.google.tts import GoogleTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
@@ -61,7 +55,6 @@ transport_params = {
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
@@ -69,7 +62,6 @@ transport_params = {
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -104,11 +96,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,29 +29,23 @@ from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -109,11 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,29 +29,23 @@ from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -92,11 +84,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,29 +29,23 @@ from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -92,11 +84,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,30 +28,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -82,11 +74,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -28,9 +28,8 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter
|
||||
from pipecat.audio.turn.krisp_viva_turn import KrispTurnParams, KrispVivaTurn
|
||||
from pipecat.audio.turn.krisp_viva_turn import KrispVivaTurn
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -48,31 +47,25 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
),
|
||||
}
|
||||
@@ -101,6 +94,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=KrispVivaTurn())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -11,9 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.filters.krisp_filter import KrispFilter
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,31 +29,25 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
audio_in_filter=KrispFilter(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
audio_in_filter=KrispFilter(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
audio_in_filter=KrispFilter(),
|
||||
),
|
||||
}
|
||||
@@ -80,11 +72,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -11,9 +11,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,30 +29,24 @@ from pipecat.services.rime.tts import RimeHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -85,13 +77,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,29 +28,23 @@ from pipecat.services.rime.tts import RimeTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -64,7 +56,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
tts = RimeTTSService(
|
||||
api_key=os.getenv("RIME_API_KEY", ""),
|
||||
voice_id="rex",
|
||||
voice_id="luna",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
@@ -79,11 +71,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,29 +28,23 @@ from pipecat.services.nvidia.tts import NvidiaTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -78,11 +70,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -12,9 +12,7 @@ from dotenv import load_dotenv
|
||||
from google.genai.types import Content, Part
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
@@ -44,8 +42,6 @@ from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -100,7 +96,7 @@ class UserAudioCollector(FrameProcessor):
|
||||
self._user_speaking = True
|
||||
elif isinstance(frame, UserStoppedSpeakingFrame):
|
||||
self._user_speaking = False
|
||||
self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||
await self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||
await self._user_context_aggregator.push_frame(LLMRunFrame())
|
||||
|
||||
elif isinstance(frame, InputAudioRawFrame):
|
||||
@@ -197,24 +193,20 @@ class TranscriptionContextFixup(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -251,11 +243,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
audio_collector = UserAudioCollector(context, user_aggregator)
|
||||
pull_transcript_out_of_llm_output = TranscriptExtractor(context)
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,30 +28,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -80,11 +72,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -11,9 +11,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,30 +29,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -84,13 +76,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,29 +28,23 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -79,11 +71,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,30 +28,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -82,11 +74,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -11,9 +11,7 @@ import sys
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -27,8 +25,6 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -41,7 +37,6 @@ async def main():
|
||||
LocalAudioTransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -64,11 +59,7 @@ async def main():
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -11,9 +11,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -32,30 +30,24 @@ from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -86,13 +78,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -11,9 +11,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -32,30 +30,24 @@ from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -88,13 +80,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -9,9 +9,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -29,30 +27,24 @@ from pipecat.services.sarvam.tts import SarvamTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -82,11 +74,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -26,12 +24,11 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.soniox.stt import SonioxSTTService
|
||||
from pipecat.services.soniox.stt import SonioxInputParams, SonioxSTTService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -39,17 +36,14 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -59,6 +53,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
stt = SonioxSTTService(
|
||||
api_key=os.getenv("SONIOX_API_KEY"),
|
||||
params=SonioxInputParams(
|
||||
language_hints=[Language.EN],
|
||||
language_hints_strict=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
@@ -78,11 +76,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSTextFrame
|
||||
from pipecat.observers.loggers.debug_log_observer import DebugLogObserver, FrameEndpoint
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -23,7 +21,6 @@ from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.frameworks.rtvi import RTVIObserver, RTVIProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
@@ -33,8 +30,6 @@ from pipecat.transports.base_output import BaseOutputTransport
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -42,17 +37,14 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -84,21 +76,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
rtvi = RTVIProcessor()
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
@@ -115,7 +98,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[
|
||||
RTVIObserver(rtvi),
|
||||
DebugLogObserver(
|
||||
frame_types={
|
||||
TTSTextFrame: (BaseOutputTransport, FrameEndpoint.SOURCE),
|
||||
|
||||
@@ -9,9 +9,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSTextFrame
|
||||
from pipecat.observers.loggers.debug_log_observer import DebugLogObserver, FrameEndpoint
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -22,7 +20,6 @@ from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
@@ -32,8 +29,6 @@ from pipecat.transports.base_output import BaseOutputTransport
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -42,17 +37,14 @@ transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -81,19 +73,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
@@ -110,7 +95,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[
|
||||
RTVIObserver(rtvi),
|
||||
DebugLogObserver(
|
||||
frame_types={
|
||||
TTSTextFrame: (BaseOutputTransport, FrameEndpoint.SOURCE),
|
||||
|
||||
@@ -11,9 +11,7 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,30 +29,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -84,13 +76,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -10,9 +10,7 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,30 +28,24 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -80,11 +72,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -13,7 +13,6 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.filters.aic_filter import AICFilter
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -32,56 +31,42 @@ from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# Create audio buffer processor so we can hear the audio fitler results.
|
||||
audiobuffer = AudioBufferProcessor(
|
||||
num_channels=2, # 1 for mono, 2 for stereo (user left, bot right)
|
||||
enable_turn_audio=False, # Enable per-turn audio recording
|
||||
)
|
||||
|
||||
|
||||
def _create_aic_filter() -> AICFilter:
|
||||
license_key = os.getenv("AICOUSTICS_LICENSE_KEY", "")
|
||||
|
||||
return AICFilter(
|
||||
license_key=license_key,
|
||||
enhancement_level=0.5,
|
||||
model_id="quail-vf-l-16khz",
|
||||
)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
aic_filter = _create_aic_filter()
|
||||
aic_vad_analyzer = aic_filter.create_vad_analyzer(
|
||||
speech_hold_duration=0.05, minimum_speech_duration=0.0, sensitivity=6.0
|
||||
)
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: (
|
||||
lambda aic: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=aic.create_vad_analyzer(lookback_buffer_size=6.0, sensitivity=6.0),
|
||||
audio_in_filter=aic,
|
||||
)
|
||||
)(_create_aic_filter()),
|
||||
"twilio": lambda: (
|
||||
lambda aic: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=aic.create_vad_analyzer(lookback_buffer_size=6.0, sensitivity=6.0),
|
||||
audio_in_filter=aic,
|
||||
)
|
||||
)(_create_aic_filter()),
|
||||
"webrtc": lambda: (
|
||||
lambda aic: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=aic.create_vad_analyzer(lookback_buffer_size=6.0, sensitivity=6.0),
|
||||
audio_in_filter=aic,
|
||||
)
|
||||
)(_create_aic_filter()),
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
audio_in_filter=aic_filter,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
audio_in_filter=aic_filter,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
audio_in_filter=aic_filter,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -107,11 +92,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
),
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=aic_vad_analyzer),
|
||||
)
|
||||
|
||||
# Create audio buffer processor so we can hear the audio fitler results.
|
||||
audiobuffer = AudioBufferProcessor(
|
||||
num_channels=2, # 1 for mono, 2 for stereo (user left, bot right)
|
||||
enable_turn_audio=False, # Enable per-turn audio recording
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user