Compare commits
368 Commits
hush/recor
...
hush/rtviS
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fd6379cb6a | ||
|
|
7618d7511a | ||
|
|
9ca7bad978 | ||
|
|
10289c1f1c | ||
|
|
213d5d6abc | ||
|
|
01f37f769d | ||
|
|
52b393537a | ||
|
|
a6a4d3d71f | ||
|
|
c52de0f5de | ||
|
|
a1e1255f16 | ||
|
|
c4f758725e | ||
|
|
7bc9a78ce6 | ||
|
|
f8be71b32c | ||
|
|
957fa5546d | ||
|
|
039cb8fcae | ||
|
|
8e05f2f1a1 | ||
|
|
8467aa1ed3 | ||
|
|
9c5878af3d | ||
|
|
ef29800fe9 | ||
|
|
7e09933070 | ||
|
|
82a9d7f992 | ||
|
|
facbebb15f | ||
|
|
2ba60fc41f | ||
|
|
685f951ae2 | ||
|
|
27d4c927a8 | ||
|
|
20a59e8c56 | ||
|
|
d9a0a93667 | ||
|
|
154d5d1859 | ||
|
|
a192217256 | ||
|
|
10cdc47e05 | ||
|
|
2b4d41a548 | ||
|
|
962f8062a5 | ||
|
|
d80d385b2f | ||
|
|
b347ca472f | ||
|
|
c3c4952abf | ||
|
|
f369ab4c1a | ||
|
|
62b41c6789 | ||
|
|
2872bc7902 | ||
|
|
9658b75a10 | ||
|
|
63de9039e6 | ||
|
|
9352396d7e | ||
|
|
d1ab1d38b7 | ||
|
|
080f70d91c | ||
|
|
ebed1fc6ea | ||
|
|
6821b1cdab | ||
|
|
144ae9b611 | ||
|
|
a2e7331ce2 | ||
|
|
8accd3e387 | ||
|
|
3d05a74dc0 | ||
|
|
e3c965f4d5 | ||
|
|
5354e5d891 | ||
|
|
5784e91cff | ||
|
|
bc5f098aaa | ||
|
|
93534b4692 | ||
|
|
b23d54c609 | ||
|
|
aa23a7b1e6 | ||
|
|
c0c41789ab | ||
|
|
029ef4f8c2 | ||
|
|
9cad6dfce9 | ||
|
|
4df6444832 | ||
|
|
944bc23135 | ||
|
|
1d863ee7de | ||
|
|
9ca775d1ab | ||
|
|
03002ad685 | ||
|
|
99a4154cbc | ||
|
|
0f68cc182d | ||
|
|
3ac50b9902 | ||
|
|
9557705b53 | ||
|
|
a7718926e9 | ||
|
|
dfa10af6ed | ||
|
|
8485ea6c5e | ||
|
|
b298376766 | ||
|
|
7cfefe4f84 | ||
|
|
71c7373987 | ||
|
|
d1086914fe | ||
|
|
2fb85941d3 | ||
|
|
be8788e4da | ||
|
|
acb6abd761 | ||
|
|
a528aad957 | ||
|
|
5c13252801 | ||
|
|
a4422ac6c2 | ||
|
|
985a031353 | ||
|
|
5c4079b286 | ||
|
|
5489ac5a73 | ||
|
|
49fbcc86ac | ||
|
|
d20c3307b9 | ||
|
|
9fd76923fd | ||
|
|
a753a623d4 | ||
|
|
4ee6c4b59e | ||
|
|
e79a002e5a | ||
|
|
420912dd4b | ||
|
|
de7185e8db | ||
|
|
8bfcfe8b1d | ||
|
|
26d2ce5926 | ||
|
|
9ee56bff9e | ||
|
|
2e0d77e4f0 | ||
|
|
4cc8a4312c | ||
|
|
cb7cb381aa | ||
|
|
b29ffeef29 | ||
|
|
b7b2a5b7a1 | ||
|
|
3384598e07 | ||
|
|
c420dbe57f | ||
|
|
fa8aafc7a5 | ||
|
|
4b364dda29 | ||
|
|
6bb765e40f | ||
|
|
c80d09f66c | ||
|
|
f8ff10c5d5 | ||
|
|
09ff836ef6 | ||
|
|
e446ecac14 | ||
|
|
8c0c8a6153 | ||
|
|
70033ae00b | ||
|
|
ac9dce63ae | ||
|
|
8b2df48fab | ||
|
|
156a5690fc | ||
|
|
d42c618398 | ||
|
|
b23ca5a4a8 | ||
|
|
63a6697a90 | ||
|
|
f1e45d0f02 | ||
|
|
4ad227ca2d | ||
|
|
66cc18194b | ||
|
|
7d65132c93 | ||
|
|
db7d7a4204 | ||
|
|
7bbac11084 | ||
|
|
76c8322b57 | ||
|
|
7b1cd3523d | ||
|
|
6bd821ac9a | ||
|
|
a6d51c343e | ||
|
|
1a5cf7a521 | ||
|
|
69491417ec | ||
|
|
b91780ced2 | ||
|
|
8ded666958 | ||
|
|
2490c804a5 | ||
|
|
dd8856a673 | ||
|
|
e7da08dab1 | ||
|
|
ae60d42016 | ||
|
|
50e8d82ece | ||
|
|
cc9901a82f | ||
|
|
1fd43e8a3f | ||
|
|
fdc508a1a5 | ||
|
|
37269db247 | ||
|
|
51269aabbd | ||
|
|
74ecc19e09 | ||
|
|
c6d48c16df | ||
|
|
873d84aa09 | ||
|
|
7360866c97 | ||
|
|
81f4768661 | ||
|
|
972d65f61b | ||
|
|
1da9d398e3 | ||
|
|
7358bc6428 | ||
|
|
a6af499f84 | ||
|
|
f9d1a53e28 | ||
|
|
3f3010af79 | ||
|
|
a02d47ddbd | ||
|
|
a649aff3e7 | ||
|
|
a9b551d73e | ||
|
|
747a821943 | ||
|
|
010db3ccd5 | ||
|
|
db773b8b93 | ||
|
|
16b7bf71b4 | ||
|
|
82d19508a4 | ||
|
|
dc3646f0e7 | ||
|
|
62e659cd3a | ||
|
|
b2945f44fd | ||
|
|
618fbef81c | ||
|
|
70c42dfa6e | ||
|
|
9ab374dd1f | ||
|
|
cc6d284417 | ||
|
|
f77d8f0b6f | ||
|
|
9c0beb05cf | ||
|
|
858981c404 | ||
|
|
9eed225aa2 | ||
|
|
9f7371e485 | ||
|
|
d77c37ff14 | ||
|
|
b4916f9dae | ||
|
|
004a920920 | ||
|
|
203c5a3a60 | ||
|
|
7f6fb1754b | ||
|
|
a390ce13a4 | ||
|
|
61d31d1c40 | ||
|
|
e872ff943a | ||
|
|
c71005e249 | ||
|
|
6e06bf97c0 | ||
|
|
a80dc94e91 | ||
|
|
3ea9cfd251 | ||
|
|
a80f82cdb6 | ||
|
|
d24bab354f | ||
|
|
53ee3fb64c | ||
|
|
3599761e4e | ||
|
|
c0b3fe3985 | ||
|
|
497d48b6c8 | ||
|
|
e179916c9c | ||
|
|
b0b38beb19 | ||
|
|
8577139d21 | ||
|
|
e2fbbb4b40 | ||
|
|
88ce117e84 | ||
|
|
266537c3f4 | ||
|
|
230d2f80fa | ||
|
|
3f0688aefa | ||
|
|
5be3e6979e | ||
|
|
9c19cff818 | ||
|
|
95f3537bde | ||
|
|
7ff748defd | ||
|
|
2dafbee2aa | ||
|
|
1e0a9d7b06 | ||
|
|
4a23e138b1 | ||
|
|
384f80983f | ||
|
|
f6f01ea7e4 | ||
|
|
f385cc0460 | ||
|
|
e97de43de2 | ||
|
|
8299c96ad4 | ||
|
|
e9af585edd | ||
|
|
31f7082d12 | ||
|
|
6cea71270e | ||
|
|
d05b2d0e8d | ||
|
|
a458c1e92b | ||
|
|
5bbf1d0209 | ||
|
|
235cd9cecc | ||
|
|
829f3ed2db | ||
|
|
ac64f0ba91 | ||
|
|
ce41a7585b | ||
|
|
ce92dfb5ec | ||
|
|
ee132a2188 | ||
|
|
5f3bbf9828 | ||
|
|
55d1d81430 | ||
|
|
8e36bdbed7 | ||
|
|
cd8bd7f487 | ||
|
|
5fa47b7a5c | ||
|
|
616961b487 | ||
|
|
650d4d9ee2 | ||
|
|
2627cb6bf2 | ||
|
|
0e4115049b | ||
|
|
3ebef9346f | ||
|
|
3e2d21779f | ||
|
|
cfefcac35f | ||
|
|
57b39c084f | ||
|
|
11b6de0900 | ||
|
|
824bc9bf16 | ||
|
|
d0ddef6c12 | ||
|
|
ad40a0f076 | ||
|
|
e6325a8229 | ||
|
|
6d10732889 | ||
|
|
fdb46a0fa9 | ||
|
|
3588b06718 | ||
|
|
73874f6ec0 | ||
|
|
6ab9a8ad7f | ||
|
|
821e303249 | ||
|
|
efae26a5a8 | ||
|
|
d16ace22ac | ||
|
|
001c26b79c | ||
|
|
8dc4f1cda0 | ||
|
|
ab6be11a0e | ||
|
|
054158b0ff | ||
|
|
174cf13abd | ||
|
|
099d2c02e1 | ||
|
|
e1108466f6 | ||
|
|
edd53d425e | ||
|
|
b160cf34e9 | ||
|
|
dae3b927e1 | ||
|
|
bd3d30111a | ||
|
|
8c7e16e717 | ||
|
|
f6accbd510 | ||
|
|
8186219879 | ||
|
|
b9a2ed5b58 | ||
|
|
7ac12ffc85 | ||
|
|
f623cf96f7 | ||
|
|
06be20eb16 | ||
|
|
816b3a9545 | ||
|
|
255666925b | ||
|
|
0df065fda4 | ||
|
|
241a947b8b | ||
|
|
e28c199dd1 | ||
|
|
6220ee4efb | ||
|
|
b650d043bf | ||
|
|
121e6d2157 | ||
|
|
dbd7869de7 | ||
|
|
b7d56d5ff0 | ||
|
|
61cba0136f | ||
|
|
ed743b55d4 | ||
|
|
fb074895f5 | ||
|
|
d916865ccc | ||
|
|
6378a8ccd3 | ||
|
|
5dbb5f176b | ||
|
|
b89f2611f7 | ||
|
|
db0f783c55 | ||
|
|
20ec323647 | ||
|
|
f71c09a4fd | ||
|
|
cba4ebfcf9 | ||
|
|
3b9a8946f9 | ||
|
|
db3620c4be | ||
|
|
11338ea92d | ||
|
|
90563a4091 | ||
|
|
937f5f7cb7 | ||
|
|
4f221b817a | ||
|
|
c79c1f65fc | ||
|
|
8ad2ad0e59 | ||
|
|
499b258bf9 | ||
|
|
05b6a5ae4b | ||
|
|
65fcea28ce | ||
|
|
005c0b55b6 | ||
|
|
1828127f41 | ||
|
|
77ab841cab | ||
|
|
3bbc75110a | ||
|
|
b2ce1d9378 | ||
|
|
58714865df | ||
|
|
03b3635b0a | ||
|
|
aaa7b5e626 | ||
|
|
0b8486ce39 | ||
|
|
d4ae091ddd | ||
|
|
9e0a57a6de | ||
|
|
fc4c1e4110 | ||
|
|
9b740d9e72 | ||
|
|
b03563765f | ||
|
|
a1578bd67a | ||
|
|
6466573b84 | ||
|
|
b42dc83696 | ||
|
|
fe5931b884 | ||
|
|
4b438ff7d7 | ||
|
|
89a8c16676 | ||
|
|
c4c92585f9 | ||
|
|
af23200511 | ||
|
|
63146d6f85 | ||
|
|
ec00edc893 | ||
|
|
a21be058e2 | ||
|
|
c226c20e12 | ||
|
|
78e6669105 | ||
|
|
79f29e14dd | ||
|
|
d4a00fd080 | ||
|
|
d4186fa115 | ||
|
|
3536cbcd13 | ||
|
|
e3bcb70b13 | ||
|
|
19a82f9522 | ||
|
|
8c0a847449 | ||
|
|
e3704cd1a1 | ||
|
|
1ba037865b | ||
|
|
909520f76e | ||
|
|
d06cfcd597 | ||
|
|
2579d0cf57 | ||
|
|
1ec20b2e74 | ||
|
|
55a6e5aa4c | ||
|
|
2229730169 | ||
|
|
24b54c66ee | ||
|
|
a14205415f | ||
|
|
18b56d4a10 | ||
|
|
b85bd91d08 | ||
|
|
23f3285a7d | ||
|
|
94f6436619 | ||
|
|
480692971c | ||
|
|
5df5f6ae4c | ||
|
|
6940112ab9 | ||
|
|
80584e9138 | ||
|
|
1fd01e715d | ||
|
|
a7a1cd0cde | ||
|
|
e5a6b9d2b4 | ||
|
|
169b50af61 | ||
|
|
31311d8ac5 | ||
|
|
bfd06b321d | ||
|
|
3efbcab39c | ||
|
|
b40ca391f5 | ||
|
|
43008c8c5b | ||
|
|
3a37b11e56 | ||
|
|
9ea81bc982 | ||
|
|
98b499e2e9 | ||
|
|
72c8f6c8c3 | ||
|
|
ea61256ddc | ||
|
|
babafadbe4 | ||
|
|
a5660f6dc7 | ||
|
|
20a1dd066d | ||
|
|
c9f7882728 |
87
.github/ISSUE_TEMPLATE/1-bug_report.yml
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
name: Bug report
|
||||
description: Report a bug or unexpected behavior
|
||||
type: Bug
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Bug Report
|
||||
|
||||
Thank you for taking the time to fill out this bug report.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
### Environment
|
||||
|
||||
- type: input
|
||||
id: pipecat-version
|
||||
attributes:
|
||||
label: pipecat version
|
||||
description: Which version are you using?
|
||||
placeholder: e.g., 0.0.63
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: python-version
|
||||
attributes:
|
||||
label: Python version
|
||||
description: Which Python version are you using?
|
||||
placeholder: e.g., 3.12.8
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: os
|
||||
attributes:
|
||||
label: Operating System
|
||||
description: Which OS are you using?
|
||||
placeholder: e.g., Ubuntu 24.04, Windows 11, macOS 12.5
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Issue description
|
||||
description: Provide a clear description of the issue.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: repro
|
||||
attributes:
|
||||
label: Reproduction steps
|
||||
description: List the steps to reproduce the issue.
|
||||
placeholder: |
|
||||
1. Do this...
|
||||
2. Then do that...
|
||||
3. Observe the error...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: expected
|
||||
attributes:
|
||||
label: Expected behavior
|
||||
description: What did you expect to happen?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: actual
|
||||
attributes:
|
||||
label: Actual behavior
|
||||
description: What actually happened?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Logs
|
||||
description: If applicable, include any relevant logs or error messages
|
||||
render: shell
|
||||
validations:
|
||||
required: false
|
||||
67
.github/ISSUE_TEMPLATE/2-question.yml
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
name: Question
|
||||
description: Ask a question or get help
|
||||
type: Question
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Question
|
||||
|
||||
Use this form to ask a question about pipecat.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
### Environment (if applicable)
|
||||
|
||||
- type: input
|
||||
id: pipecat-version
|
||||
attributes:
|
||||
label: pipecat version
|
||||
description: Which version are you using? (if applicable)
|
||||
placeholder: e.g., 0.0.63
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: input
|
||||
id: python-version
|
||||
attributes:
|
||||
label: Python version
|
||||
description: Which Python version are you using? (if applicable)
|
||||
placeholder: e.g., 3.12.8
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: input
|
||||
id: os
|
||||
attributes:
|
||||
label: Operating System
|
||||
description: Which OS are you using? (if applicable)
|
||||
placeholder: e.g., Ubuntu 24.04, Windows 11, macOS 12.5
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: question
|
||||
attributes:
|
||||
label: Question
|
||||
description: Provide your question in detail here.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: tried
|
||||
attributes:
|
||||
label: What I've tried
|
||||
description: Describe what you've already tried or research you've done.
|
||||
placeholder: I've looked at the documentation and tried...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: context
|
||||
attributes:
|
||||
label: Context
|
||||
description: Any additional context or information that might help others understand your question better.
|
||||
validations:
|
||||
required: false
|
||||
52
.github/ISSUE_TEMPLATE/3-feature_request.yml
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
name: Feature request
|
||||
description: Suggest an enhancement or new feature
|
||||
type: Enhancement
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Feature Request
|
||||
|
||||
Thank you for suggesting an enhancement to pipecat.
|
||||
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
label: Problem Statement
|
||||
description: A clear description of the problem this feature would solve.
|
||||
placeholder: I'm always frustrated when...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: solution
|
||||
attributes:
|
||||
label: Proposed Solution
|
||||
description: A clear and concise description of what you want to happen.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: alternatives
|
||||
attributes:
|
||||
label: Alternative Solutions
|
||||
description: Any alternative solutions or features you've considered.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: context
|
||||
attributes:
|
||||
label: Additional Context
|
||||
description: Add any other context, mockups, or screenshots about the feature request here.
|
||||
placeholder: You can drag and drop images here to include them.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: checkboxes
|
||||
id: contribution
|
||||
attributes:
|
||||
label: Would you be willing to help implement this feature?
|
||||
options:
|
||||
- label: Yes, I'd like to contribute
|
||||
- label: No, I'm just suggesting
|
||||
82
.github/ISSUE_TEMPLATE/4-service-issue.yml
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
name: Service Issue
|
||||
description: An issue with a third-party service
|
||||
type: Service Issue
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Service Issue
|
||||
|
||||
Use this form to report an issue with a third-party service integration.
|
||||
|
||||
- type: input
|
||||
id: pipecat-version
|
||||
attributes:
|
||||
label: pipecat version
|
||||
description: Which version are you using?
|
||||
placeholder: e.g., 0.0.63
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: service-name
|
||||
attributes:
|
||||
label: Service Name
|
||||
description: Which third-party service is having issues?
|
||||
placeholder: e.g., OpenAI, ElevenLabs, Anthropic
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: service-version
|
||||
attributes:
|
||||
label: Service or model version
|
||||
description: Which version of the service API or model are you using?
|
||||
placeholder: e.g., v1, gpt-4.1
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Issue Description
|
||||
description: Provide a clear description of the service issue.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
attributes:
|
||||
label: Reproduction Steps
|
||||
description: Provide steps to reproduce the issue.
|
||||
placeholder: |
|
||||
1. Configure service X
|
||||
2. Call method Y
|
||||
3. See error Z
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: expected
|
||||
attributes:
|
||||
label: Expected Behavior
|
||||
description: What did you expect to happen?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: actual
|
||||
attributes:
|
||||
label: Actual Behavior
|
||||
description: What actually happened?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Error Logs
|
||||
description: If available, include any error messages or logs.
|
||||
render: shell
|
||||
validations:
|
||||
required: false
|
||||
56
.github/ISSUE_TEMPLATE/5-new-service.yml
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
name: New Service
|
||||
description: Request to support a new third-party service
|
||||
type: New Service
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## New Service Request
|
||||
|
||||
Use this form to request support for a new third-party service in pipecat.
|
||||
|
||||
- type: input
|
||||
id: service-name
|
||||
attributes:
|
||||
label: Service Name
|
||||
description: What is the name of the third-party service?
|
||||
placeholder: e.g., NewAPI, SomeService
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: service-website
|
||||
attributes:
|
||||
label: Service Website
|
||||
description: Link to the service's website or documentation
|
||||
placeholder: e.g., https://newapi.com
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: service-description
|
||||
attributes:
|
||||
label: Service Description
|
||||
description: Briefly describe what this service does and how it works.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: api-info
|
||||
attributes:
|
||||
label: API Information
|
||||
description: If available, provide details about the service's API.
|
||||
placeholder: |
|
||||
- API documentation link
|
||||
- Authentication method
|
||||
- Key endpoints you'd like supported
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: checkboxes
|
||||
id: contribution
|
||||
attributes:
|
||||
label: Would you be willing to help implement this service?
|
||||
options:
|
||||
- label: Yes, I'd like to contribute
|
||||
- label: No, I'm just suggesting
|
||||
74
.github/ISSUE_TEMPLATE/6-dependency.yml
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
name: Dependency Issue
|
||||
description: An issue with a Pipecat dependency (not a third-party service)
|
||||
type: Dependency Issue
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Dependency Issue
|
||||
|
||||
Use this form to report an issue with a Pipecat dependency.
|
||||
|
||||
- type: input
|
||||
id: pipecat-version
|
||||
attributes:
|
||||
label: pipecat version
|
||||
description: Which version are you using?
|
||||
placeholder: e.g., 0.0.63
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: dependency-name
|
||||
attributes:
|
||||
label: Dependency Name
|
||||
description: Which Pipecat dependency is causing the issue?
|
||||
placeholder: e.g., openai, anthropic, fastapi
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: dependency-version
|
||||
attributes:
|
||||
label: Dependency Version
|
||||
description: Which version of the dependency are you using?
|
||||
placeholder: e.g., 1.2.3
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Issue Description
|
||||
description: Provide a clear description of the dependency issue.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: impact
|
||||
attributes:
|
||||
label: Impact
|
||||
description: How is this dependency issue affecting your usage of pipecat?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
attributes:
|
||||
label: Reproduction Steps
|
||||
description: If applicable, provide steps to reproduce the issue.
|
||||
placeholder: |
|
||||
1. Install dependency X
|
||||
2. Run command Y
|
||||
3. See error Z
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Error Logs
|
||||
description: If applicable, include any relevant error messages or logs.
|
||||
render: shell
|
||||
validations:
|
||||
required: false
|
||||
70
.github/ISSUE_TEMPLATE/7-troubleshooting.yml
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
name: Troubleshooting
|
||||
description: Help with a specific use case
|
||||
type: Troubleshooting
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Troubleshooting Request
|
||||
|
||||
Use this form to get help with a specific use case or implementation.
|
||||
|
||||
- type: input
|
||||
id: pipecat-version
|
||||
attributes:
|
||||
label: pipecat version
|
||||
description: Which version are you using?
|
||||
placeholder: e.g., 0.0.63
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: python-version
|
||||
attributes:
|
||||
label: Python version
|
||||
description: Which version of Python are you using?
|
||||
placeholder: e.g., 3.12.8
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: os
|
||||
attributes:
|
||||
label: Operating System
|
||||
description: Which OS are you using?
|
||||
placeholder: e.g., Ubuntu 24.04, Windows 11, macOS 12.5
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: use-case
|
||||
attributes:
|
||||
label: Use Case Description
|
||||
description: Describe what you're trying to accomplish with pipecat.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: current-approach
|
||||
attributes:
|
||||
label: Current Approach
|
||||
description: What have you tried so far? Include code snippets if relevant.
|
||||
render: python
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: errors
|
||||
attributes:
|
||||
label: Errors or Unexpected Behavior
|
||||
description: Describe any errors or unexpected behavior you're encountering.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: additional-context
|
||||
attributes:
|
||||
label: Additional Context
|
||||
description: Any other information that might help us understand your situation.
|
||||
validations:
|
||||
required: false
|
||||
1
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1 @@
|
||||
blank_issues_enabled: false
|
||||
30
.gitignore
vendored
@@ -7,7 +7,7 @@ venv
|
||||
/.idea
|
||||
#*#
|
||||
|
||||
# Distribution / packaging
|
||||
# Distribution / Packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
@@ -30,24 +30,24 @@ MANIFEST
|
||||
.env
|
||||
fly.toml
|
||||
|
||||
# Example files
|
||||
pipecat/examples/twilio-chatbot/templates/streams.xml
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/node_modules/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/.expo/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/dist/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/npm-debug.*
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.jks
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.p8
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.p12
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.key
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.mobileprovision
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.orig.*
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/web-build/
|
||||
# Examples
|
||||
examples/telnyx-chatbot/templates/streams.xml
|
||||
examples/twilio-chatbot/templates/streams.xml
|
||||
examples/**/node_modules/
|
||||
examples/**/.expo/
|
||||
examples/**/dist/
|
||||
examples/**/npm-debug.*
|
||||
examples/**/*.jks
|
||||
examples/**/*.p8
|
||||
examples/**/*.p12
|
||||
examples/**/*.key
|
||||
examples/**/*.mobileprovision
|
||||
examples/**/*.orig.*
|
||||
examples/**/web-build/
|
||||
|
||||
# macOS
|
||||
.DS_Store
|
||||
|
||||
|
||||
# Documentation
|
||||
docs/api/_build/
|
||||
docs/api/api
|
||||
335
CHANGELOG.md
@@ -9,6 +9,302 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- Added `RTVIObserverParams` which allows you to configure what RTVI messages
|
||||
are sent to the clients.
|
||||
|
||||
- Added a `context_window_compression` InputParam to
|
||||
`GeminiMultimodalLiveLLMService` which allows you to enable a sliding context
|
||||
window for the session as well as set the token limit of the sliding window.
|
||||
|
||||
- Updated `SmallWebRTCConnection` to support `ice_servers` with credentials.
|
||||
|
||||
- Added `VADUserStartedSpeakingFrame` and `VADUserStoppedSpeakingFrame`,
|
||||
indicating when the VAD detected the user to start and stop speaking. These
|
||||
events are helpful when using smart turn detection, as the user's stop time
|
||||
can differ from when their turn ends (signified by UserStoppedSpeakingFrame).
|
||||
|
||||
- Added `TranslationFrame`, a new frame type that contains a translated
|
||||
transcription.
|
||||
|
||||
- Added `TransportParams.audio_in_passthrough`. If set (the default), incoming
|
||||
audio will be pushed downstream.
|
||||
|
||||
- Added `MCPClient`; a way to connect to MCP servers and use the MCP servers'
|
||||
tools.
|
||||
|
||||
- Added `Mem0 OSS`, along with Mem0 cloud support now the OSS version is also
|
||||
available.
|
||||
|
||||
### Changed
|
||||
|
||||
- The `STTMuteFilter` now mutes `InterimTranscriptionFrame` and
|
||||
`TranscriptionFrame` which allows the `STTMuteFilter` to be used in
|
||||
conjunction with transports that generate transcripts, e.g. `DailyTransport`.
|
||||
|
||||
- Function calls now receive a single parameter `FunctionCallParams` instead of
|
||||
`(function_name, tool_call_id, args, llm, context, result_callback)` which is
|
||||
now deprecated.
|
||||
|
||||
- Changed the user aggregator timeout for late transcriptions from 1.0s to 0.5s
|
||||
(`LLMUserAggregatorParams.aggregation_timeout`). Sometimes, the STT services
|
||||
might give us more than one transcription which could come after the user
|
||||
stopped speaking. We still want to include these additional transcriptions
|
||||
with the first one because it's part of the user turn. This is what this
|
||||
timeout is helpful with.
|
||||
|
||||
- Short utterances not detected by VAD while the bot is speaking are now
|
||||
ignored. This reduces the amount of bot interruptions significantly providing
|
||||
a more natural conversation experience.
|
||||
|
||||
- Updated `GladiaSTTService` to output a `TranslationFrame` when specifying a
|
||||
`translation` and `translation_config`.
|
||||
|
||||
- STT services now passthrough audio frames by default. This allows you to add
|
||||
audio recording without worrying about what's wrong in your pipeline when it
|
||||
doesn't work the first time.
|
||||
|
||||
- Input transports now always push audio downstream unless disabled with
|
||||
`TransportParams.audio_in_passthrough`. After many Pipecat releases, we
|
||||
realized this is the common use case. There are use cases where the input
|
||||
transport already provides STT and you also don't want recordings, in which
|
||||
case there's no need to push audio to the rest of the pipeline, but this is
|
||||
not a very common case.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- Function calls with parameters
|
||||
`(function_name, tool_call_id, args, llm, context, result_callback)` are
|
||||
deprectated, use a single `FunctionCallParams` parameter instead.
|
||||
|
||||
- `TransportParams.camera_*` parameters are now deprecated, use
|
||||
`TransportParams.video_*` instead.
|
||||
|
||||
- `TransportParams.vad_enabled` parameter is now deprecated, use
|
||||
`TransportParams.audio_in_enabled` and `TransportParams.vad_analyzer` instead.
|
||||
|
||||
- `TransportParams.vad_audio_passthrough` parameter is now deprecated, use
|
||||
`TransportParams.audio_in_passthrough` instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with `GeminiMultimodalLiveLLMService` where the context
|
||||
contained tokens instead of words.
|
||||
|
||||
- Fixed an issue with HTTP Smart Turn handling, where the service returns a 500
|
||||
error. Previously, this would cause an unhandled exception. Now, a 500 error
|
||||
is treated as an incomplete response.
|
||||
|
||||
- Fixed a TTS services issue that could cause assistant output not to be
|
||||
aggregated to the context when also using `TTSSpeakFrame`s.
|
||||
|
||||
- Fixed an issue where the `SmartTurnMetricsData` was reporting 0ms for
|
||||
inference and processing time when using the `FalSmartTurnAnalyzer`.
|
||||
|
||||
### Other
|
||||
|
||||
- Added 04 foundational examples for client/server transports. Also, renamed
|
||||
`29-livekit-audio-chat.py` to `04b-transports-livekit.py`.
|
||||
|
||||
- Added foundational example `13c-gladia-translation.py` showing how to use
|
||||
`TranscriptionFrame` and `TranslationFrame`.
|
||||
|
||||
## [0.0.65] - 2025-04-23 "Sant Jordi's release" 🌹📕
|
||||
|
||||
https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia
|
||||
|
||||
### Added
|
||||
|
||||
- Added automatic hangup logic to the Telnyx serializer. This feature hangs up
|
||||
the Telnyx call when an `EndFrame` or `CancelFrame` is received. It is
|
||||
enabled by default and is configurable via the `auto_hang_up` `InputParam`.
|
||||
|
||||
- Added a keepalive task to `GladiaSTTService` to prevent the websocket from
|
||||
disconnecting after 30 seconds of no audio input.
|
||||
|
||||
### Changed
|
||||
|
||||
- The `InputParams` for `ElevenLabsTTSService` and `ElevenLabsHttpTTSService`
|
||||
no longer require that `stability` and `similarity_boost` be set. You can
|
||||
individually set each param.
|
||||
|
||||
- In `TwilioFrameSerializer`, `call_sid` is Optional so as to avoid a breaking
|
||||
changed. `call_sid` is required to automatically hang up.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where `TwilioFrameSerializer` would send two hang up commands:
|
||||
one for the `EndFrame` and one for the `CancelFrame`.
|
||||
|
||||
## [0.0.64] - 2025-04-22
|
||||
|
||||
### Added
|
||||
|
||||
- Added automatic hangup logic to the Twilio serializer. This feature hangs up
|
||||
the Twilio call when an `EndFrame` or `CancelFrame` is received. It is
|
||||
enabled by default and is configurable via the `auto_hang_up` `InputParam`.
|
||||
|
||||
- Added `SmartTurnMetricsData`, which contains end-of-turn prediction metrics,
|
||||
to the `MetricsFrame`. Using `MetricsFrame`, you can now retrieve prediction
|
||||
confidence scores and processing time metrics from the smart turn analyzers.
|
||||
|
||||
- Added support for Application Default Credentials in Google services,
|
||||
`GoogleSTTService`, `GoogleTTSService`, and `GoogleVertexLLMService`.
|
||||
|
||||
- Added support for Smart Turn Detection via the `turn_analyzer` transport
|
||||
parameter. You can now choose between `HttpSmartTurnAnalyzer()` or
|
||||
`FalSmartTurnAnalyzer()` for remote inference or
|
||||
`LocalCoreMLSmartTurnAnalyzer()` for on-device inference using Core ML.
|
||||
|
||||
- `DeepgramTTSService` accepts `base_url` argument again, allowing you to
|
||||
connect to an on-prem service.
|
||||
|
||||
- Added `LLMUserAggregatorParams` and `LLMAssistantAggregatorParams` which allow
|
||||
you to control aggregator settings. You can now pass these arguments when
|
||||
creating aggregator pairs with `create_context_aggregator()`.
|
||||
|
||||
- Added `previous_text` context support to ElevenLabsHttpTTSService, improving
|
||||
speech consistency across sentences within an LLM response.
|
||||
|
||||
- Added word/timestamp pairs to `ElevenLabsHttpTTSService`.
|
||||
|
||||
- It is now possible to disable `SoundfileMixer` when created. You can then use
|
||||
`MixerEnableFrame` to dynamically enable it when necessary.
|
||||
|
||||
- Added `on_client_connected` and `on_client_disconnected` event handlers to
|
||||
the `DailyTransport` class. These handlers map to the same underlying Daily
|
||||
events as `on_participant_joined` and `on_participant_left`, respectively.
|
||||
This makes it easier to write a single bot pipeline that can also use other
|
||||
transports like `SmallWebRTCTransport` and `FastAPIWebsocketTransport`.
|
||||
|
||||
### Changed
|
||||
|
||||
- `GrokLLMService` now uses `grok-3-beta` as its default model.
|
||||
|
||||
- Daily's REST helpers now include an `eject_at_token_exp` param, which ejects
|
||||
the user when their token expires. This new parameter defaults to False.
|
||||
Also, the default value for `enable_prejoin_ui` changed to False and
|
||||
`eject_at_room_exp` changed to False.
|
||||
|
||||
- `OpenAILLMService` and `OpenPipeLLMService` now use `gpt-4.1` as their
|
||||
default model.
|
||||
|
||||
- `SoundfileMixer` constructor arguments need to be keywords.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `DeepgramSTTService` parameter `url` is now deprecated, use `base_url`
|
||||
instead.
|
||||
|
||||
### Removed
|
||||
|
||||
- Parameters `user_kwargs` and `assistant_kwargs` when creating a context
|
||||
aggregator pair using `create_context_aggregator()` have been removed. Use
|
||||
`user_params` and `assistant_params` instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue that would cause TTS websocket-based services to not cleanup
|
||||
resources properly when disconnecting.
|
||||
|
||||
- Fixed a `TavusVideoService` issue that was causing audio choppiness.
|
||||
|
||||
- Fixed an issue in `SmallWebRTCTransport` where an error was thrown if the
|
||||
client did not create a video transceiver.
|
||||
|
||||
- Fixed an issue where LLM input parameters were not working and applied
|
||||
correctly in `GoogleVertexLLMService`, causing unexpected behavior during
|
||||
inference.
|
||||
|
||||
### Other
|
||||
|
||||
- Updated the `twilio-chatbot` example to use the auto-hangup feature.
|
||||
|
||||
## [0.0.63] - 2025-04-11
|
||||
|
||||
### Added
|
||||
|
||||
- Added media resolution control to `GeminiMultimodalLiveLLMService` with
|
||||
`GeminiMediaResolution` enum, allowing configuration of token usage for
|
||||
image processing (LOW: 64 tokens, MEDIUM: 256 tokens, HIGH: zoomed reframing
|
||||
with 256 tokens).
|
||||
|
||||
- Added Gemini's Voice Activity Detection (VAD) configuration to
|
||||
`GeminiMultimodalLiveLLMService` with `GeminiVADParams`, allowing fine
|
||||
control over speech detection sensitivity and timing, including:
|
||||
|
||||
- Start sensitivity (how quickly speech is detected)
|
||||
- End sensitivity (how quickly turns end after pauses)
|
||||
- Prefix padding (milliseconds of audio to keep before speech is detected)
|
||||
- Silence duration (milliseconds of silence required to end a turn)
|
||||
|
||||
- Added comprehensive language support to `GeminiMultimodalLiveLLMService`,
|
||||
supporting over 30 languages via the `language` parameter, with proper
|
||||
mapping between Pipecat's `Language` enum and Gemini's language codes.
|
||||
|
||||
- Added support in `SmallWebRTCTransport` to detect when remote tracks are
|
||||
muted.
|
||||
|
||||
- Added support for image capture from a video stream to the
|
||||
`SmallWebRTCTransport`.
|
||||
|
||||
- Added a new iOS client option to the `SmallWebRTCTransport`
|
||||
**video-transform** example.
|
||||
|
||||
- Added new processors `ProducerProcessor` and `ConsumerProcessor`. The
|
||||
producer processor processes frames from the pipeline and decides whether the
|
||||
consumers should consume it or not. If so, the same frame that is received by
|
||||
the producer is sent to the consumer. There can be multiple consumers per
|
||||
producer. These processors can be useful to push frames from one part of a
|
||||
pipeline to a different one (e.g. when using `ParallelPipeline`).
|
||||
|
||||
- Improvements for the `SmallWebRTCTransport`:
|
||||
- Wait until the pipeline is ready before triggering the `connected` event.
|
||||
- Queue messages if the data channel is not ready.
|
||||
- Update the aiortc dependency to fix an issue where the 'video/rtx' MIME
|
||||
type was incorrectly handled as a codec retransmission.
|
||||
- Avoid initial video delays.
|
||||
|
||||
### Changed
|
||||
|
||||
- In `GeminiMultimodalLiveLLMService`, removed the `transcribe_model_audio`
|
||||
parameter in favor of Gemini Live's native output transcription support. Now
|
||||
text transcriptions are produced directly by the model. No configuration is
|
||||
required.
|
||||
|
||||
- Updated `GeminiMultimodalLiveLLMService`’s default `model` to
|
||||
`models/gemini-2.0-flash-live-001` and `base_url` to the `v1beta` websocket
|
||||
URL.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Updated `daily-python` to 0.17.0 to fix an issue that was preventing to run on
|
||||
older platforms.
|
||||
|
||||
- Fixed an issue where `CartesiaTTSService`'s spell feature would result in
|
||||
the spelled word in the context appearing as "F,O,O,B,A,R" instead of
|
||||
"FOOBAR".
|
||||
|
||||
- Fixed an issue in the Azure TTS services where the language was being set
|
||||
incorrectly.
|
||||
|
||||
- Fixed `SmallWebRTCTransport` to support dynamic values for
|
||||
`TransportParams.audio_out_10ms_chunks`. Previously, it only worked with 20ms
|
||||
chunks.
|
||||
|
||||
- Fixed an issue with `GeminiMultimodalLiveLLMService` where the assistant
|
||||
context messages had no space between words.
|
||||
|
||||
- Fixed an issue where `LLMAssistantContextAggregator` would prevent a
|
||||
`BotStoppedSpeakingFrame` from moving through the pipeline.
|
||||
|
||||
## [0.0.62] - 2025-04-01 "An April Fools' release"
|
||||
|
||||
### Added
|
||||
|
||||
- Added `TransportParams.audio_out_10ms_chunks` parameter to allow controlling
|
||||
the amount of audio being sent by the output transport. It defaults to 4, so
|
||||
40ms audio chunks are sent.
|
||||
|
||||
- Added `QwenLLMService` for Qwen integration with an OpenAI-compatible
|
||||
interface. Added foundational example `14q-function-calling-qwen.py`.
|
||||
|
||||
@@ -53,6 +349,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Changed
|
||||
|
||||
- `FunctionCallResultFrame`a are now system frames. This is to prevent function
|
||||
call results to be discarded during interruptions.
|
||||
|
||||
- Pipecat services have been reorganized into packages. Each package can have
|
||||
one or more of the following modules (in the future new module names might be
|
||||
needed) depending on the services implemented:
|
||||
@@ -69,15 +368,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
be found in
|
||||
`pipecat.services.[ai_service,image_service,llm_service,stt_service,vision_service]`.
|
||||
|
||||
- `GladiaSTTService` now uses Gladia's default values.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue that would cause `SegmentedSTTService` based services
|
||||
(e.g. `OpenAISTTService`) to try to transcribe non-spoken audio, causing
|
||||
invalid transcriptions.
|
||||
|
||||
- Fixed an issue where `GoogleTTSService` was emitting two `TTSStoppedFrames`.
|
||||
- `GladiaSTTService` now uses the `solaria-1` model by default. Other params
|
||||
use Gladia's default values. Added support for more language codes.
|
||||
|
||||
### Deprecated
|
||||
|
||||
@@ -96,6 +388,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Deprecated using `GladiaSTTService.InputParams` directly. Use the new
|
||||
`GladiaInputParams` class instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `FastAPIWebsocketTransport` and `WebsocketClientTransport` issue that
|
||||
would cause the transport to be closed prematurely, preventing the internally
|
||||
queued audio to be sent. The same issue could also cause an infinite loop
|
||||
while using an output mixer and when sending an `EndFrame`, preventing the bot
|
||||
to finish.
|
||||
|
||||
- Fixed an issue that could cause the `TranscriptionUpdateFrame` being pushed
|
||||
because of an interruption to be discarded.
|
||||
|
||||
- Fixed an issue that would cause `SegmentedSTTService` based services
|
||||
(e.g. `OpenAISTTService`) to try to transcribe non-spoken audio, causing
|
||||
invalid transcriptions.
|
||||
|
||||
- Fixed an issue where `GoogleTTSService` was emitting two `TTSStoppedFrames`.
|
||||
|
||||
### Performance
|
||||
|
||||
- Output transports now send 40ms audio chunks instead of 20ms. This should
|
||||
improve performance.
|
||||
|
||||
- `BotSpeakingFrame`s are now sent every 200ms. If the output transport audio chunks
|
||||
are higher than 200ms then they will be sent at every audio chunk.
|
||||
|
||||
### Other
|
||||
|
||||
- Added foundational example `37-mem0.py` demonstrating how to use the
|
||||
|
||||
@@ -26,11 +26,52 @@ git commit -m "Description of your changes"
|
||||
git push origin your-branch-name
|
||||
```
|
||||
|
||||
9. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
|
||||
> Important: Describe the changes you've made clearly!
|
||||
8. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
|
||||
> Important: Describe the changes you've made clearly!
|
||||
|
||||
Our maintainers will review your PR, and once everything is good, your contributions will be merged!
|
||||
|
||||
## Code Style and Documentation
|
||||
|
||||
### Python Code Style
|
||||
|
||||
We use Ruff for code linting and formatting. Please ensure your code passes all linting checks before submitting a PR.
|
||||
|
||||
### Docstring Conventions
|
||||
|
||||
We follow Google-style docstrings with these specific conventions:
|
||||
|
||||
- Class docstrings should fully document all parameters used in `__init__`
|
||||
- We don't require separate docstrings for `__init__` methods when parameters are documented in the class docstring
|
||||
- Property methods should have docstrings explaining their purpose and return value
|
||||
|
||||
Example of correctly documented class:
|
||||
|
||||
```python
|
||||
class MyClass:
|
||||
"""Class description.
|
||||
|
||||
Additional details about the class.
|
||||
|
||||
Args:
|
||||
param1: Description of first parameter.
|
||||
param2: Description of second parameter.
|
||||
"""
|
||||
|
||||
def __init__(self, param1, param2):
|
||||
# No docstring required here as parameters are documented above
|
||||
self.param1 = param1
|
||||
self.param2 = param2
|
||||
|
||||
@property
|
||||
def some_property(self) -> str:
|
||||
"""Get the formatted property value.
|
||||
|
||||
Returns:
|
||||
A string representation of the property.
|
||||
"""
|
||||
return f"Property: {self.param1}"
|
||||
```
|
||||
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
@@ -51,23 +92,23 @@ diverse, inclusive, and healthy community.
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
- Demonstrating empathy and kindness toward other people
|
||||
- Being respectful of differing opinions, viewpoints, and experiences
|
||||
- Giving and gracefully accepting constructive feedback
|
||||
- Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
- Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
- The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
- Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
- Public or private harassment
|
||||
- Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
- Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
@@ -162,4 +203,4 @@ For answers to common questions about this code of conduct, see the FAQ at
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
|
||||
239
README.md
@@ -1,43 +1,72 @@
|
||||
<h1><div align="center">
|
||||
<img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
|
||||
<img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
|
||||
</div></h1>
|
||||
|
||||
[](https://pypi.org/project/pipecat-ai)  [](https://codecov.io/gh/pipecat-ai/pipecat) [](https://docs.pipecat.ai) [](https://discord.gg/pipecat)
|
||||
|
||||
Pipecat is an open source Python framework for building voice and multimodal conversational agents. It handles the complex orchestration of AI services, network transport, audio processing, and multimodal interactions, letting you focus on creating engaging experiences.
|
||||
# 🎙️ Pipecat: Real-Time Voice & Multimodal AI Agents
|
||||
|
||||
## What you can build
|
||||
**Pipecat** is an open-source Python framework for building real-time voice and multimodal conversational agents. Orchestrate audio and video, AI services, different transports, and conversation pipelines effortlessly—so you can focus on what makes your agent unique.
|
||||
|
||||
- **Voice Assistants**: [Natural, real-time conversations with AI](https://demo.dailybots.ai/)
|
||||
- **Interactive Agents**: Personal coaches and meeting assistants
|
||||
- **Multimodal Apps**: Combine voice, video, images, and text
|
||||
- **Creative Tools**: [Story-telling experiences](https://storytelling-chatbot.fly.dev/) and social companions
|
||||
- **Business Solutions**: [Customer intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0) and support bots
|
||||
- **Complex conversational flows**: [Refer to Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) to learn more
|
||||
## 🚀 What You Can Build
|
||||
|
||||
## See it in action
|
||||
- **Voice Assistants** – natural, streaming conversations with AI
|
||||
- **AI Companions** – coaches, meeting assistants, characters
|
||||
- **Multimodal Interfaces** – voice, video, images, and more
|
||||
- **Interactive Storytelling** – creative tools with generative media
|
||||
- **Business Agents** – customer intake, support bots, guided flows
|
||||
- **Complex Dialog Systems** – design logic with structured conversations
|
||||
|
||||
🧭 Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
|
||||
|
||||
## 🧠 Why Pipecat?
|
||||
|
||||
- **Voice-first**: Integrates speech recognition, text-to-speech, and conversation handling
|
||||
- **Pluggable**: Supports many AI services and tools
|
||||
- **Composable Pipelines**: Build complex behavior from modular components
|
||||
- **Real-Time**: Ultra-low latency interaction with different transports (e.g. WebSockets or WebRTC)
|
||||
|
||||
## 🎬 See it in action
|
||||
|
||||
<p float="left">
|
||||
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/simple-chatbot/image.png" width="280" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/storytelling-chatbot/image.png" width="280" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/simple-chatbot/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/storytelling-chatbot/image.png" width="400" /></a>
|
||||
<br/>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/translation-chatbot/image.png" width="280" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/moondream-chatbot/image.png" width="280" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/translation-chatbot/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/moondream-chatbot/image.png" width="400" /></a>
|
||||
</p>
|
||||
|
||||
## Key features
|
||||
## 📱 Client SDKs
|
||||
|
||||
- **Voice-first Design**: Built-in speech recognition, TTS, and conversation handling
|
||||
- **Flexible Integration**: Works with popular AI services (OpenAI, ElevenLabs, etc.)
|
||||
- **Pipeline Architecture**: Build complex apps from simple, reusable components
|
||||
- **Real-time Processing**: Frame-based pipeline architecture for fluid interactions
|
||||
- **Production Ready**: Enterprise-grade WebRTC and Websocket support
|
||||
You can connect to Pipecat from any platform using our official SDKs:
|
||||
|
||||
💡 Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
|
||||
| Platform | SDK Repo | Description |
|
||||
| -------- | ------------------------------------------------------------------------------ | -------------------------------- |
|
||||
| Web | [pipecat-client-web](https://github.com/pipecat-ai/pipecat-client-web) | JavaScript and React client SDKs |
|
||||
| iOS | [pipecat-client-ios](https://github.com/pipecat-ai/pipecat-client-ios) | Swift SDK for iOS |
|
||||
| Android | [pipecat-client-android](https://github.com/pipecat-ai/pipecat-client-android) | Kotlin SDK for Android |
|
||||
| C++ | [pipecat-client-cxx](https://github.com/pipecat-ai/pipecat-client-cxx) | C++ client SDK |
|
||||
|
||||
## Getting started
|
||||
## 🧩 Available services
|
||||
|
||||
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when you’re ready. You can also add a 📞 telephone number, 🖼️ image output, 📺 video input, use different LLMs, and more.
|
||||
| Category | Services |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
## ⚡ Getting started
|
||||
|
||||
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when you’re ready.
|
||||
|
||||
```shell
|
||||
# Install the module
|
||||
@@ -53,155 +82,71 @@ To keep things lightweight, only the core framework is included by default. If y
|
||||
pip install "pipecat-ai[option,...]"
|
||||
```
|
||||
|
||||
### Available services
|
||||
|
||||
| Category | Services | Install Command Example |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | `pip install "pipecat-ai[mem0]"` |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | `pip install "pipecat-ai[moondream]"` |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
|
||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
## Code examples
|
||||
## 🧪 Code examples
|
||||
|
||||
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
|
||||
- [Example apps](https://github.com/pipecat-ai/pipecat/tree/main/examples/) — complete applications that you can use as starting points for development
|
||||
|
||||
## A simple voice agent running locally
|
||||
## 🛠️ Hacking on the framework itself
|
||||
|
||||
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [Cartesia](https://cartesia.ai/) for text-to-speech.
|
||||
1. Set up a virtual environment before following these instructions. From the root of the repo:
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
```shell
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
```
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
2. Install the development dependencies:
|
||||
|
||||
async def main():
|
||||
# Use Daily as a real-time media transport (WebRTC)
|
||||
transport = DailyTransport(
|
||||
room_url=...,
|
||||
token="", # leave empty. Note: token is _not_ your api key
|
||||
bot_name="Bot Name",
|
||||
params=DailyParams(audio_out_enabled=True))
|
||||
```shell
|
||||
pip install -r dev-requirements.txt
|
||||
```
|
||||
|
||||
# Use Cartesia for Text-to-Speech
|
||||
tts = CartesiaTTSService(
|
||||
api_key=...,
|
||||
voice_id=...
|
||||
)
|
||||
3. Install the git pre-commit hooks (these help ensure your code follows project rules):
|
||||
|
||||
# Simple pipeline that will process text to speech and output the result
|
||||
pipeline = Pipeline([tts, transport.output()])
|
||||
```shell
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
# Create Pipecat processor that can run one or more pipelines tasks
|
||||
runner = PipelineRunner()
|
||||
4. Install the `pipecat-ai` package locally in editable mode:
|
||||
|
||||
# Assign the task callable to run the pipeline
|
||||
task = PipelineTask(pipeline)
|
||||
```shell
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
# Register an event handler to play audio when a
|
||||
# participant joins the transport WebRTC session
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant.get("info", {}).get("userName", "")
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Cartesia)
|
||||
await task.queue_frame(TextFrame(f"Hello there, {participant_name}!"))
|
||||
> The `-e` or `--editable` option allows you to modify the code without reinstalling.
|
||||
|
||||
# Register an event handler to exit the application when the user leaves.
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
5. Include optional dependencies as needed. For example:
|
||||
|
||||
# Run the pipeline task
|
||||
await runner.run(task)
|
||||
```shell
|
||||
pip install -e ".[daily,deepgram,cartesia,openai,silero]"
|
||||
```
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
```
|
||||
6. (Optional) If you want to use this package from another directory:
|
||||
|
||||
Run it with:
|
||||
|
||||
```shell
|
||||
python app.py
|
||||
```
|
||||
|
||||
Daily provides a prebuilt WebRTC user interface. While the app is running, you can visit at `https://<yourdomain>.daily.co/<room_url>` and listen to the bot say hello!
|
||||
|
||||
## WebRTC for production use
|
||||
|
||||
WebSockets are fine for server-to-server communication or for initial development. But for production use, you’ll need client-server audio to use a protocol designed for real-time media transport. (For an explanation of the difference between WebSockets and WebRTC, see [this post.](https://www.daily.co/blog/how-to-talk-to-an-llm-with-your-voice/#webrtc))
|
||||
|
||||
One way to get up and running quickly with WebRTC is to sign up for a Daily developer account. Daily gives you SDKs and global infrastructure for audio (and video) routing. Every account gets 10,000 audio/video/transcription minutes free each month.
|
||||
|
||||
Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://docs.daily.co/reference/rest-api/rooms) in the developer Dashboard.
|
||||
|
||||
## Hacking on the framework itself
|
||||
|
||||
_Note: You may need to set up a virtual environment before following these instructions. From the root of the repo:_
|
||||
|
||||
```shell
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
```
|
||||
|
||||
Install the development dependencies:
|
||||
|
||||
```shell
|
||||
pip install -r dev-requirements.txt
|
||||
```
|
||||
|
||||
Install the git pre-commit hooks (these help ensure your code follows project rules):
|
||||
|
||||
```shell
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
Install the `pipecat-ai` package locally in editable mode:
|
||||
|
||||
```shell
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
The `-e` or `--editable` option allows you to modify the code without reinstalling.
|
||||
|
||||
To include optional dependencies, add them to the install command. For example:
|
||||
|
||||
```shell
|
||||
pip install -e ".[daily,deepgram,cartesia,openai,silero]" # Updated for the services you're using
|
||||
```
|
||||
|
||||
If you want to use this package from another directory:
|
||||
|
||||
```shell
|
||||
pip install "path_to_this_repo[option,...]"
|
||||
```
|
||||
```shell
|
||||
pip install "path_to_this_repo[option,...]"
|
||||
```
|
||||
|
||||
### Running tests
|
||||
|
||||
Install the test dependencies:
|
||||
|
||||
```shell
|
||||
pip install -r test-requirements.txt
|
||||
```
|
||||
|
||||
From the root directory, run:
|
||||
|
||||
```shell
|
||||
pytest
|
||||
```
|
||||
|
||||
## Setting up your editor
|
||||
### Setting up your editor
|
||||
|
||||
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting via [Ruff](https://github.com/astral-sh/ruff).
|
||||
|
||||
### Emacs
|
||||
#### Emacs
|
||||
|
||||
You can use [use-package](https://github.com/jwiegley/use-package) to install [emacs-lazy-ruff](https://github.com/christophermadsen/emacs-lazy-ruff) package and configure `ruff` arguments:
|
||||
|
||||
@@ -223,7 +168,7 @@ You can use [use-package](https://github.com/jwiegley/use-package) to install [e
|
||||
:hook ((python-mode . pyvenv-auto-run)))
|
||||
```
|
||||
|
||||
### Visual Studio Code
|
||||
#### Visual Studio Code
|
||||
|
||||
Install the
|
||||
[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, and enable formatting on save:
|
||||
@@ -235,7 +180,7 @@ Install the
|
||||
}
|
||||
```
|
||||
|
||||
### PyCharm
|
||||
#### PyCharm
|
||||
|
||||
`ruff` was installed in the `venv` environment described before, now to enable autoformatting on save, go to `File` -> `Settings` -> `Tools` -> `File Watchers` and add a new watcher with the following settings:
|
||||
|
||||
@@ -245,7 +190,7 @@ Install the
|
||||
4. **Arguments**: `format $FilePath$`
|
||||
5. **Program**: `$PyInterpreterDirectory$/ruff`
|
||||
|
||||
## Contributing
|
||||
## 🤝 Contributing
|
||||
|
||||
We welcome contributions from the community! Whether you're fixing bugs, improving documentation, or adding new features, here's how you can help:
|
||||
|
||||
@@ -258,7 +203,7 @@ Before submitting a pull request, please check existing issues and PRs to avoid
|
||||
|
||||
We aim to review all contributions promptly and provide constructive feedback to help get your changes merged.
|
||||
|
||||
## Getting help
|
||||
## 🛟 Getting help
|
||||
|
||||
➡️ [Join our Discord](https://discord.gg/pipecat)
|
||||
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
# Description
|
||||
Is this reporting a bug or feature request?
|
||||
|
||||
|
||||
If reporting a bug, please fill out the following:
|
||||
|
||||
### Environment
|
||||
- pipecat-ai version:
|
||||
- python version:
|
||||
- OS:
|
||||
|
||||
### Issue description
|
||||
Provide a clear description of the issue.
|
||||
|
||||
### Repro steps
|
||||
List the steps to reproduce the issue.
|
||||
|
||||
### Expected behavior
|
||||
|
||||
### Actual behavior
|
||||
|
||||
### Logs
|
||||
@@ -50,7 +50,6 @@ autodoc_mock_imports = [
|
||||
"pyht.protos",
|
||||
"pyht.protos.api_pb2",
|
||||
"pipecat_ai_playht", # PlayHT wrapper
|
||||
"vllm",
|
||||
"aiortc",
|
||||
"aiortc.mediastreams",
|
||||
"cv2",
|
||||
@@ -76,7 +75,6 @@ autodoc_mock_imports = [
|
||||
"openpipe",
|
||||
"simli",
|
||||
"soundfile",
|
||||
# Existing mocks
|
||||
"pipecat_ai_krisp",
|
||||
"pyaudio",
|
||||
"_tkinter",
|
||||
@@ -87,6 +85,66 @@ autodoc_mock_imports = [
|
||||
"pydantic.Field",
|
||||
"pydantic._internal._model_construction",
|
||||
"pydantic._internal._fields",
|
||||
# Moondream dependencies
|
||||
"torch",
|
||||
"transformers",
|
||||
"intel_extension_for_pytorch",
|
||||
# Ultravox dependencies
|
||||
"huggingface_hub",
|
||||
"vllm",
|
||||
"vllm.engine.arg_utils",
|
||||
"transformers.AutoTokenizer",
|
||||
# Langchain dependencies
|
||||
"langchain_core",
|
||||
"langchain_core.messages",
|
||||
"langchain_core.runnables",
|
||||
"langchain_core.messages.AIMessageChunk",
|
||||
"langchain_core.runnables.Runnable",
|
||||
# LiveKit dependencies
|
||||
"livekit",
|
||||
"livekit.rtc",
|
||||
"livekit_api",
|
||||
"livekit_protocol",
|
||||
"tenacity",
|
||||
"tenacity.retry",
|
||||
"tenacity.stop_after_attempt",
|
||||
"tenacity.wait_exponential",
|
||||
"rtc",
|
||||
"rtc.Room",
|
||||
"rtc.RoomOptions",
|
||||
"rtc.AudioSource",
|
||||
"rtc.LocalAudioTrack",
|
||||
"rtc.TrackPublishOptions",
|
||||
"rtc.TrackSource",
|
||||
"rtc.AudioStream",
|
||||
"rtc.AudioFrameEvent",
|
||||
"rtc.AudioFrame",
|
||||
"rtc.Track",
|
||||
"rtc.TrackKind",
|
||||
"rtc.RemoteParticipant",
|
||||
"rtc.RemoteTrackPublication",
|
||||
"rtc.DataPacket",
|
||||
# Riva dependencies
|
||||
"riva",
|
||||
"riva.client",
|
||||
"riva.client.Auth",
|
||||
"riva.client.ASRService",
|
||||
"riva.client.StreamingRecognitionConfig",
|
||||
"riva.client.RecognitionConfig",
|
||||
"riva.client.AudioEncoding",
|
||||
"riva.client.proto.riva_tts_pb2",
|
||||
"riva.client.SpeechSynthesisService",
|
||||
# Local CoreML Smart Turn dependencies
|
||||
"coremltools",
|
||||
"coremltools.models",
|
||||
"coremltools.models.MLModel",
|
||||
"torch",
|
||||
"torch.nn",
|
||||
"torch.nn.functional",
|
||||
"transformers",
|
||||
"transformers.AutoFeatureExtractor",
|
||||
# Also add specific classes that are imported
|
||||
"AutoFeatureExtractor",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
@@ -118,12 +176,25 @@ def verify_modules():
|
||||
},
|
||||
}
|
||||
|
||||
# Skip importing modules that are in autodoc_mock_imports
|
||||
skipped_modules = set(autodoc_mock_imports)
|
||||
|
||||
missing = []
|
||||
for category, modules in required_modules.items():
|
||||
if isinstance(modules, dict):
|
||||
# Handle nested structure
|
||||
for subcategory, submodules in modules.items():
|
||||
for module in submodules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if (
|
||||
f"pipecat.{category}.{subcategory}.{module}" in skipped_modules
|
||||
or module in skipped_modules
|
||||
):
|
||||
logger.info(
|
||||
f"Skipping import of mocked module: pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.info(
|
||||
@@ -137,6 +208,11 @@ def verify_modules():
|
||||
else:
|
||||
# Handle flat structure
|
||||
for module in modules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if f"pipecat.{category}.{module}" in skipped_modules or module in skipped_modules:
|
||||
logger.info(f"Skipping import of mocked module: pipecat.{category}.{module}")
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{module}")
|
||||
logger.info(f"Successfully imported pipecat.{category}.{module}")
|
||||
|
||||
@@ -45,8 +45,10 @@ Transport & Serialization
|
||||
Utilities
|
||||
~~~~~~~~~
|
||||
|
||||
* :mod:`Adapters <pipecat.adapters>`
|
||||
* :mod:`Clocks <pipecat.clocks>`
|
||||
* :mod:`Metrics <pipecat.metrics>`
|
||||
* :mod:`Observers <pipecat.observers>`
|
||||
* :mod:`Sync <pipecat.sync>`
|
||||
* :mod:`Transcriptions <pipecat.transcriptions>`
|
||||
* :mod:`Utils <pipecat.utils>`
|
||||
@@ -56,10 +58,12 @@ Utilities
|
||||
:caption: API Reference
|
||||
:hidden:
|
||||
|
||||
Adapters <api/pipecat.adapters>
|
||||
Audio <api/pipecat.audio>
|
||||
Clocks <api/pipecat.clocks>
|
||||
Frames <api/pipecat.frames>
|
||||
Metrics <api/pipecat.metrics>
|
||||
Observers <api/pipecat.observers>
|
||||
Pipeline <api/pipecat.pipeline>
|
||||
Processors <api/pipecat.processors>
|
||||
Serializers <api/pipecat.serializers>
|
||||
|
||||
@@ -26,20 +26,23 @@ pipecat-ai[grok]
|
||||
pipecat-ai[groq]
|
||||
# pipecat-ai[krisp] # Mocked
|
||||
pipecat-ai[koala]
|
||||
pipecat-ai[langchain]
|
||||
pipecat-ai[livekit]
|
||||
# pipecat-ai[langchain] # Mocked
|
||||
# pipecat-ai[livekit] # Mocked
|
||||
pipecat-ai[lmnt]
|
||||
pipecat-ai[local]
|
||||
# pipecat-ai[local-smart-turn] # Mocked
|
||||
# pipecat-ai[mem0] # Mocked
|
||||
# pipecat-ai[mlx-whisper] # Mocked
|
||||
pipecat-ai[moondream]
|
||||
# pipecat-ai[moondream] # Mocked
|
||||
pipecat-ai[nim]
|
||||
# pipecat-ai[neuphonic] # Mocked
|
||||
pipecat-ai[noisereduce]
|
||||
pipecat-ai[openai]
|
||||
# pipecat-ai[openpipe]
|
||||
# pipecat-ai[playht] # Mocked due to grpcio conflict with riva
|
||||
pipecat-ai[riva]
|
||||
pipecat-ai[qwen]
|
||||
pipecat-ai[remote-smart-turn]
|
||||
# pipecat-ai[riva] # Mocked
|
||||
pipecat-ai[silero]
|
||||
pipecat-ai[simli]
|
||||
pipecat-ai[soundfile]
|
||||
|
||||
@@ -92,4 +92,12 @@ ASSEMBLYAI_API_KEY=...
|
||||
OPENROUTER_API_KEY=...
|
||||
|
||||
# Piper
|
||||
PIPER_BASE_URL=...
|
||||
PIPER_BASE_URL=...
|
||||
|
||||
# Smart turn
|
||||
LOCAL_SMART_TURN_MODEL_PATH=
|
||||
FAL_SMART_TURN_API_KEY=...
|
||||
|
||||
# Twilio
|
||||
TWILIO_ACCOUNT_SID=
|
||||
TWILIO_AUTH_TOKEN=
|
||||
@@ -43,9 +43,7 @@ async def main():
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_in_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_audio_passthrough=True,
|
||||
video_out_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
@@ -72,7 +70,7 @@ async def main():
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
||||
@@ -66,9 +66,7 @@ async def main():
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_in_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_audio_passthrough=True,
|
||||
video_out_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
@@ -95,7 +93,7 @@ async def main():
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
||||
@@ -41,8 +41,7 @@ async def main(room_url: str, token: str):
|
||||
api_key=daily_api_key,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
video_out_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
@@ -53,7 +52,7 @@ async def main(room_url: str, token: str):
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
||||
@@ -32,9 +32,9 @@ async def main(room_url: str, token: str):
|
||||
token,
|
||||
"bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
@@ -43,7 +43,7 @@ async def main(room_url: str, token: str):
|
||||
api_key=os.getenv("CARTESIA_API_KEY", ""), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==0.71.3
|
||||
pipecat-ai[daily,silero,cartesia,openai]==0.0.52
|
||||
pipecat-ai[daily,silero,cartesia,openai]
|
||||
fastapi==0.115.6
|
||||
aiohttp==3.11.11
|
||||
|
||||
178
examples/deployment/pipecat-cloud-daily-pstn-server/README.md
Normal file
@@ -0,0 +1,178 @@
|
||||
# Handling PSTN/SIP Dial-in on Pipecat Cloud
|
||||
|
||||
This repository contains two server implementations for handling
|
||||
the pinless dial-in workflow in Pipecat Cloud. This is the companion to the
|
||||
Pipecat Cloud [pstn_sip starter image](https://github.com/daily-co/pipecat-cloud-images/tree/main/pipecat-starters/pstn_sip).
|
||||
In addition you can use `/api/dial` to trigger dial-out, and
|
||||
eventually, call-transfers.
|
||||
|
||||
1. [FastAPI Server](fastapi-webhook-server/README.md) -
|
||||
A FastAPI implementation that handles PSTN (Public Switched Telephone
|
||||
Network) and SIP (Session Initiation Protocol) calls using the Daily API.
|
||||
|
||||
2. [Next.js Serverless](nextjs-webhook-server/README.md) -
|
||||
A Next.js API implementation designed for deployment on Vercel's
|
||||
serverless platform.
|
||||
|
||||
Both implementations provide:
|
||||
|
||||
- HMAC signature validation for pinless webhook
|
||||
- Structured logging
|
||||
- Support for dial-in and dial-out settings
|
||||
- Voicemail detection and call transfer functionality (coming soon)
|
||||
- Test request handling
|
||||
|
||||
## Choosing an Implementation
|
||||
|
||||
- Use the **FastAPI Server** if you:
|
||||
|
||||
- Need a standalone server
|
||||
- Prefer Python and FastAPI
|
||||
- Want to deploy to traditional hosting platforms
|
||||
|
||||
- Use the **Next.js Serverless** implementation if you:
|
||||
- Want serverless deployment
|
||||
- Prefer JavaScript/TypeScript
|
||||
- Already use Next.js and Vercel for other projects
|
||||
- Need quick scaling and zero maintenance
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Both implementations require similar environment variables:
|
||||
|
||||
- `PIPECAT_CLOUD_API_KEY`: Pipecat Cloud API Key, begins with pk\_\*
|
||||
- `AGENT_NAME`: Your Daily agent name
|
||||
- `PINLESS_HMAC_SECRET`: Your HMAC secret for request verification
|
||||
- `LOG_LEVEL`: (Optional) Logging level (defaults to 'info')
|
||||
|
||||
See the individual README files in each implementation directory for
|
||||
specific setup instructions.
|
||||
|
||||
### Phone number setup
|
||||
|
||||
You can buy a phone number through the Pipecat Cloud Dashboard:
|
||||
|
||||
1. Go to `Settings` > `Telephony`
|
||||
2. Follow the UI to purchase a phone number
|
||||
3. Configure the webhook URL to receive incoming calls (e.g. `https://my-webhook-url.com/api/dial`)
|
||||
|
||||
Or purchase the number using Daily's
|
||||
[PhoneNumbers API](https://docs.daily.co/reference/rest-api/phone-numbers).
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
--url https://api.daily.co/v1/domain-dialin-config \
|
||||
--header 'Authorization: Bearer $TOKEN' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"type": "pinless_dialin",
|
||||
"name_prefix": "Customer1",
|
||||
"phone_number": "+1PURCHASED_NUM",
|
||||
"room_creation_api": "https://example.com/api/dial",
|
||||
"hold_music_url": "https://example.com/static/ringtone.mp3",
|
||||
"timeout_config": {
|
||||
"message": "No agent is available right now"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
The API will return a static SIP URI (`sip_uri`) that can be called
|
||||
from other SIP services.
|
||||
|
||||
### `room_creation_api`
|
||||
|
||||
To make and receive calls currently you have to host a server that
|
||||
handles incoming calls. In the coming weeks, incoming calls will be
|
||||
directly handled within Daily and we will expose an endpoint similar
|
||||
to `{service}/start` that will manage this for you.
|
||||
|
||||
In the meantime, the server described below serves as the webhook
|
||||
handler for the `room_creation_api`. Configure your pinless phone
|
||||
number or SIP interconnect to the `ngrok` tunnel or
|
||||
the actual server URL, append `/api/dial` to the webhook URL.
|
||||
|
||||
## Example curl commands
|
||||
|
||||
Note: Replace `http://localhost:3000` with your actual server URL and
|
||||
phone numbers with valid values for your use case.
|
||||
|
||||
### Dialin Request
|
||||
|
||||
The server will receive a request when a call is received from Daily.
|
||||
|
||||
### Dialout Request
|
||||
|
||||
Dial a number, will use any purchased number
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3000/api/dial \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Dial a number with callerId, which is the UUID of a purchased number.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3000/api/dial \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
"callerId": "purchased_phone_uuid"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Dial a number
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3000/api/dial \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
"callerId": "purchased_phone_uuid"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
### Advanced Request with Voicemail Detection
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3000/api/dial \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"To": "+1234567890",
|
||||
"From": "+1987654321",
|
||||
"callId": "call-uuid-123",
|
||||
"callDomain": "domain-uuid-456",
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
"callerId": "purchased_phone_uuid"
|
||||
}
|
||||
],
|
||||
"voicemail_detection": {
|
||||
"testInPrebuilt": true
|
||||
},
|
||||
"call_transfer": {
|
||||
"mode": "dialout",
|
||||
"speakSummary": true,
|
||||
"storeSummary": true,
|
||||
"operatorNumber": "+1234567890",
|
||||
"testInPrebuilt": true
|
||||
}
|
||||
}'
|
||||
```
|
||||
@@ -0,0 +1,98 @@
|
||||
# FastAPI server for handling Daily PSTN/SIP Webhook
|
||||
|
||||
A FastAPI server that handles PSTN (Public Switched Telephone Network) and SIP (Session Initiation Protocol) calls using the Daily API.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository
|
||||
|
||||
2. Navigate to the `fastapi-webhook-server` directory:
|
||||
|
||||
```bash
|
||||
cd fastapi-webhook-server
|
||||
```
|
||||
|
||||
3. Install dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. Copy `env.example` to `.env`:
|
||||
|
||||
```bash
|
||||
cp env.example .env
|
||||
```
|
||||
|
||||
5. Update `.env` with your credentials:
|
||||
|
||||
- `AGENT_NAME`: Your Daily agent name
|
||||
- `PIPECAT_CLOUD_API_KEY`: Your Daily API key
|
||||
- `PINLESS_HMAC_SECRET`: Your HMAC secret for request verification
|
||||
|
||||
## Running the Server
|
||||
|
||||
Start the server:
|
||||
|
||||
```bash
|
||||
python server.py
|
||||
```
|
||||
|
||||
The server will run on `http://localhost:7860` and you can expose it via ngrok for testing:
|
||||
|
||||
```bash
|
||||
`ngrok http 7860`
|
||||
```
|
||||
|
||||
> Tip: Use a subdomain for a consistent URL (e.g. `ngrok http -subdomain=mydomain http://localhost:7860`)
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### GET /
|
||||
|
||||
Health check endpoint that returns a "Hello, World!" message.
|
||||
|
||||
### POST /api/dial
|
||||
|
||||
Initiates a PSTN/SIP call with the following request body format:
|
||||
|
||||
```json
|
||||
{
|
||||
"To": "+14152251493",
|
||||
"From": "+14158483432",
|
||||
"callId": "string-contains-uuid",
|
||||
"callDomain": "string-contains-uuid",
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+14158483432",
|
||||
"callerId": "+14152251493"
|
||||
}
|
||||
],
|
||||
"voicemail_detection": {
|
||||
"testInPrebuilt": true
|
||||
},
|
||||
"call_transfer": {
|
||||
"mode": "dialout",
|
||||
"speakSummary": true,
|
||||
"storeSummary": true,
|
||||
"operatorNumber": "+14152250006",
|
||||
"testInPrebuilt": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
Returns a JSON object containing:
|
||||
|
||||
- `status`: Success/failure status
|
||||
- `data`: Response from Daily API
|
||||
- `room_properties`: Properties of the created Daily room
|
||||
|
||||
## Error Handling
|
||||
|
||||
- 401: Invalid signature
|
||||
- 400: Invalid authorization header (e.g. missing Daily API key in bot.py)
|
||||
- 405: Method not allowed (e.g. incorrect route on the webhook URL)
|
||||
- 500: Server errors (missing API key, network issues)
|
||||
- Other status codes are passed through from the Daily API
|
||||
@@ -0,0 +1,3 @@
|
||||
AGENT_NAME="your-agent-name"
|
||||
PIPECAT_CLOUD_API_KEY="your-daily-api-key"
|
||||
PINLESS_HMAC_SECRET="hmac-secret-pinless-dialin"
|
||||
@@ -0,0 +1,6 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
requests
|
||||
pydantic
|
||||
loguru
|
||||
@@ -0,0 +1,202 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
# server.py
|
||||
|
||||
|
||||
import base64 # for calculating hmac signature
|
||||
import hmac
|
||||
import os # for accessing environment variables
|
||||
import time # for setting expiration time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
class RoomRequest(BaseModel):
|
||||
test: Optional[str] = Field(None, alias="Test", description="Test field")
|
||||
To: Optional[str] = Field(None, alias="to", description="Destination phone number")
|
||||
From: Optional[str] = Field(None, alias="from", description="Source phone number")
|
||||
callId: Optional[str] = Field(None, alias="call_id", description="Unique call identifier")
|
||||
callDomain: Optional[str] = Field(
|
||||
None, alias="call_domain", description="Call domain identifier"
|
||||
)
|
||||
dialout_settings: Optional[List[Dict[str, Any]]] = Field(
|
||||
None, description="An array of phone numbers or SIP URIs to dialout to"
|
||||
)
|
||||
voicemail_detection: Optional[Dict[str, Any]] = Field(
|
||||
None, description="A flag to perform voicemail or answeing-machine detection"
|
||||
)
|
||||
call_transfer: Optional[Dict[str, Any]] = Field(None, description="to initiate a call transfer")
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
alias_generator = None
|
||||
|
||||
|
||||
"""
|
||||
body can contain any fields, but for handling PSTN/SIP,
|
||||
we recommend sending the following custom values:
|
||||
dialin, dialout, voicemail detection, and call transfer
|
||||
|
||||
|
||||
"To": "+14152251493",
|
||||
"From": "+14158483432",
|
||||
"callId": "string-contains-uuid",
|
||||
"callDomain": "string-contains-uuid"
|
||||
These need to be remapped to dialin_settings
|
||||
|
||||
"dialout_settings": [
|
||||
{"phoneNumber": "+14158483432", "callerId": "+14152251493"},
|
||||
{"sipUri": "sip:username@sip.hostname"}
|
||||
],
|
||||
},
|
||||
|
||||
voicemail_detection:{
|
||||
testInPrebuilt: true
|
||||
},
|
||||
|
||||
"call_transfer": {
|
||||
"mode": "dialout",
|
||||
"speakSummary": true,
|
||||
"storeSummary": true,
|
||||
"operatorNumber": "+14152250006",
|
||||
"testInPrebuilt": true
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def read_root():
|
||||
return {"message": "Hello, World!"}
|
||||
|
||||
|
||||
@app.post("/api/dial")
|
||||
async def dial(request: RoomRequest, raw_request: Request):
|
||||
logger.info("Incoming request to /dial:")
|
||||
logger.info(f"Headers: {dict(raw_request.headers)}")
|
||||
raw_body = await raw_request.body()
|
||||
raw_body_str = raw_body.decode()
|
||||
logger.info(f"Raw body: {raw_body_str}")
|
||||
logger.info(f"Parsed body: {request.dict()}")
|
||||
|
||||
# calculate signature and compare/verify
|
||||
hmac_secret = os.getenv("PINLESS_HMAC_SECRET")
|
||||
timestamp = raw_request.headers.get("x-pinless-timestamp")
|
||||
signature = raw_request.headers.get("x-pinless-signature")
|
||||
|
||||
if not hmac_secret:
|
||||
logger.debug("Skipping HMAC validation - PINLESS_HMAC_SECRET not set")
|
||||
elif timestamp and signature:
|
||||
message = timestamp + "." + raw_body_str
|
||||
|
||||
base64_decoded_secret = base64.b64decode(hmac_secret)
|
||||
computed_signature = base64.b64encode(
|
||||
hmac.new(base64_decoded_secret, message.encode(), "sha256").digest()
|
||||
).decode()
|
||||
|
||||
if computed_signature != signature:
|
||||
logger.error(f"Invalid signature. Expected {signature}, got {computed_signature}")
|
||||
raise HTTPException(status_code=401, detail="Invalid signature")
|
||||
else:
|
||||
logger.debug("Skipping HMAC validation - no signature headers present")
|
||||
|
||||
if request.test == "test":
|
||||
logger.debug("Test request received")
|
||||
return {"status": "success", "message": "Test request received"}
|
||||
|
||||
dialin_settings = None
|
||||
# these fields are camelCase in the request
|
||||
required_fields = ["To", "From", "callId", "callDomain"]
|
||||
if all(
|
||||
field in request.dict() and request.dict()[field] is not None for field in required_fields
|
||||
):
|
||||
# transform from camelCase to snake_case because daily-python expects snake_case
|
||||
dialin_settings = {
|
||||
"From": request.From,
|
||||
"To": request.To,
|
||||
"call_id": request.callId,
|
||||
"call_domain": request.callDomain,
|
||||
# transform from camelCase to snake_case
|
||||
}
|
||||
logger.debug(f"Populated dialin_settings from request: {dialin_settings}")
|
||||
|
||||
daily_room_properties = {
|
||||
"enable_dialout": request.dialout_settings is not None,
|
||||
}
|
||||
|
||||
if dialin_settings is not None:
|
||||
sip_config = {
|
||||
"display_name": request.From,
|
||||
"sip_mode": "dial-in",
|
||||
"num_endpoints": 2 if request.call_transfer is not None else 1,
|
||||
"codecs": {"audio": ["OPUS"]},
|
||||
}
|
||||
daily_room_properties["sip"] = sip_config
|
||||
|
||||
# Setting default expiry to 5 minutes from now
|
||||
daily_room_properties["exp"] = int(time.time()) + (5 * 60)
|
||||
|
||||
logger.debug(f"Daily room properties: {daily_room_properties}")
|
||||
payload = {
|
||||
"createDailyRoom": True,
|
||||
"dailyRoomProperties": daily_room_properties,
|
||||
"body": {
|
||||
"dialin_settings": dialin_settings,
|
||||
"dialout_settings": request.dialout_settings,
|
||||
"voicemail_detection": request.voicemail_detection,
|
||||
"call_transfer": request.call_transfer,
|
||||
},
|
||||
}
|
||||
|
||||
pcc_api_key = os.getenv("PIPECAT_CLOUD_API_KEY")
|
||||
agent_name = os.getenv("AGENT_NAME", "my-first-agent")
|
||||
|
||||
if not pcc_api_key:
|
||||
raise HTTPException(status_code=500, detail="DAILY_API_KEY environment variable is not set")
|
||||
|
||||
headers = {"Authorization": f"Bearer {pcc_api_key}", "Content-Type": "application/json"}
|
||||
|
||||
url = f"https://api.pipecat.daily.co/v1/public/{agent_name}/start"
|
||||
|
||||
logger.debug(f"Making API call to Daily: {url} {headers} {payload}")
|
||||
|
||||
try:
|
||||
response = requests.post(url, json=payload, headers=headers)
|
||||
response.raise_for_status()
|
||||
response_data = response.json()
|
||||
logger.debug(f"Response: {response_data}")
|
||||
return {
|
||||
"status": "success",
|
||||
"data": response_data,
|
||||
"room_properties": daily_room_properties,
|
||||
}
|
||||
except requests.exceptions.HTTPError as e:
|
||||
# Pass through the status code and error details from the Daily API
|
||||
status_code = e.response.status_code
|
||||
error_detail = e.response.json() if e.response.content else str(e)
|
||||
logger.error(f"HTTP error: {error_detail}")
|
||||
raise HTTPException(status_code=status_code, detail=error_detail)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Request error: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=7860)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Server stopped manually")
|
||||
53
examples/deployment/pipecat-cloud-daily-pstn-server/nextjs-webhook-server/.gitignore
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.js
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# local env files
|
||||
.env*.local
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
|
||||
# IDE specific files
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
@@ -0,0 +1,115 @@
|
||||
# Next.js server for handling Daily PSTN/SIP Webhook
|
||||
|
||||
Next.js API routes for handling Daily PSTN/SIP Pipecat requests.
|
||||
|
||||
## Features
|
||||
|
||||
- API endpoint for handling Daily PSTN/SIP Pipecat requests
|
||||
- HMAC signature validation
|
||||
- Structured logging with Pino
|
||||
- Support for dial-in and dial-out settings
|
||||
- Voicemail detection and call transfer functionality
|
||||
- Test request handling
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository
|
||||
|
||||
2. Navigate to the `nextjs-webhook-server` directory:
|
||||
|
||||
```bash
|
||||
cd nextjs-webhook-server
|
||||
```
|
||||
|
||||
3. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
4. Create `.env.local` file with your credentials:
|
||||
|
||||
```bash
|
||||
cp env.local.example .env.local
|
||||
```
|
||||
|
||||
5. Update your `.env` with your secrets:
|
||||
|
||||
```bash
|
||||
PIPECAT_CLOUD_API_KEY=pk_*
|
||||
AGENT_NAME=my-first-agent
|
||||
PINLESS_HMAC_SECRET=your_hmac_secret
|
||||
LOG_LEVEL=info
|
||||
```
|
||||
|
||||
### Running the server
|
||||
|
||||
Run the development server:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
The server will run on `http://localhost:7860` and you can expose it via ngrok for testing:
|
||||
|
||||
```bash
|
||||
`ngrok http 7860`
|
||||
```
|
||||
|
||||
> Tip: Use a subdomain for a consistent URL (e.g. `ngrok http -subdomain=mydomain http://localhost:7860`)
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### GET /api
|
||||
|
||||
Returns a simple "Hello, World!" message with a cute cat emoji to verify the server is running.
|
||||
|
||||
### POST /api/dial
|
||||
|
||||
Handles dial-in and dial-out requests for Pipecat Cloud.
|
||||
|
||||
#### Test Requests
|
||||
|
||||
The endpoint handles test requests when a webhook is configured. Send a request with `"Test": "test"` to verify your setup:
|
||||
|
||||
```json
|
||||
{
|
||||
"Test": "test"
|
||||
}
|
||||
```
|
||||
|
||||
#### Production Request Format
|
||||
|
||||
```json
|
||||
{
|
||||
// for dial-in from webhook
|
||||
"To": "+14152251493",
|
||||
"From": "+14158483432",
|
||||
"callId": "string-contains-uuid",
|
||||
"callDomain": "string-contains-uuid",
|
||||
// for making a dial out to a phone or SIP
|
||||
"dialout_settings": [
|
||||
{ "phoneNumber": "+14158483432", "callerId": "purchased_phone_uuid" },
|
||||
{ "sipUri": "sip:username@sip.hostname.com" }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Deployment
|
||||
|
||||
The application is configured for Vercel deployment:
|
||||
|
||||
1. Push your code to a Git repository
|
||||
2. Import your project in Vercel dashboard
|
||||
3. Configure environment variables:
|
||||
- `PIPECAT_CLOUD_API_KEY`
|
||||
- `AGENT_NAME`
|
||||
- `PINLESS_HMAC_SECRET`
|
||||
- `LOG_LEVEL` (optional, defaults to 'info')
|
||||
4. Deploy!
|
||||
|
||||
## Security
|
||||
|
||||
- HMAC signature validation for request authentication
|
||||
- Environment variables for sensitive credentials
|
||||
- Method validation (POST only for /dial)
|
||||
@@ -0,0 +1,4 @@
|
||||
AGENT_NAME=my-first-agent
|
||||
PIPECAT_CLOUD_API_KEY=your_daily_api_key
|
||||
PINLESS_HMAC_SECRET=your_hmac_secret
|
||||
LOG_LEVEL="info"
|
||||
5447
examples/deployment/pipecat-cloud-daily-pstn-server/nextjs-webhook-server/package-lock.json
generated
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "my-daily-app",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev -p 7860",
|
||||
"build": "next build",
|
||||
"start": "next start -p 7860",
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"axios": "^1.6.0",
|
||||
"next": "^14.0.0",
|
||||
"pino": "^8.15.0",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint": "^8.46.0",
|
||||
"eslint-config-next": "^14.0.0"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
import { logger } from '../../lib/utils';
|
||||
import axios from 'axios';
|
||||
import crypto from 'crypto';
|
||||
|
||||
const validateSignature = (body, signature, timestamp, secret) => {
|
||||
// Skip if any required fields are missing
|
||||
if (!signature || !timestamp || !secret) {
|
||||
logger.warn('Missing required fields for HMAC validation');
|
||||
return true;
|
||||
}
|
||||
|
||||
try {
|
||||
const decodedSecret = Buffer.from(secret, 'base64');
|
||||
const hmac = crypto.createHmac('sha256', decodedSecret);
|
||||
const signatureData = `${timestamp}.${body}`;
|
||||
const computedSignature = hmac.update(signatureData).digest('base64');
|
||||
|
||||
logger.debug('Signature validation:', {
|
||||
timestamp,
|
||||
signatureData: signatureData.substring(0, 50) + '...',
|
||||
computedSignature,
|
||||
receivedSignature: signature
|
||||
});
|
||||
|
||||
return computedSignature === signature;
|
||||
} catch (error) {
|
||||
logger.error('Error validating signature:', error);
|
||||
return true; // Allow request to proceed on error
|
||||
}
|
||||
};
|
||||
|
||||
export default async function handler(req, res) {
|
||||
// Only allow POST requests
|
||||
if (req.method !== 'POST') {
|
||||
return res.status(405).json({ error: 'Method not allowed' });
|
||||
}
|
||||
|
||||
try {
|
||||
logger.info('Incoming request to /api/dial:');
|
||||
logger.info(`Headers: ${JSON.stringify(req.headers)}`);
|
||||
|
||||
const rawBody = JSON.stringify(req.body);
|
||||
logger.info(`Raw body: ${rawBody}`);
|
||||
|
||||
const signature = req.headers['x-pinless-signature'];
|
||||
const timestamp = req.headers['x-pinless-timestamp'];
|
||||
|
||||
if (signature && timestamp) {
|
||||
logger.info('Validating HMAC signature');
|
||||
if (!validateSignature(rawBody, signature, timestamp, process.env.PINLESS_HMAC_SECRET)) {
|
||||
logger.error('Invalid HMAC signature', { signature, timestamp });
|
||||
return res.status(401).json({
|
||||
error: 'Invalid signature',
|
||||
message: 'Invalid HMAC signature'
|
||||
});
|
||||
}
|
||||
} else {
|
||||
logger.info('Skipping HMAC validation - no signature headers present');
|
||||
}
|
||||
|
||||
// Extract request data
|
||||
const {
|
||||
Test: test,
|
||||
To,
|
||||
From,
|
||||
callId,
|
||||
callDomain,
|
||||
dialout_settings,
|
||||
voicemail_detection,
|
||||
call_transfer
|
||||
} = req.body;
|
||||
|
||||
// Handle test requests when a webhook is configured
|
||||
if (test === 'test') {
|
||||
logger.debug('Test request received');
|
||||
return res.status(200).json({ status: 'success', message: 'Test request received' });
|
||||
}
|
||||
|
||||
// Process dialin settings
|
||||
let dialin_settings = null;
|
||||
const requiredFields = ['To', 'From', 'callId', 'callDomain'];
|
||||
|
||||
if (requiredFields.every(field => req.body[field] !== undefined && req.body[field] !== null)) {
|
||||
dialin_settings = {
|
||||
// snake_case because pipecat expects this format
|
||||
From,
|
||||
To,
|
||||
call_id: callId,
|
||||
call_domain: callDomain,
|
||||
};
|
||||
logger.debug(`Populated dialin_settings from request: ${JSON.stringify(dialin_settings)}`);
|
||||
}
|
||||
|
||||
// Set up Daily room properties
|
||||
const daily_room_properties = {
|
||||
enable_dialout: dialout_settings !== undefined && dialout_settings !== null,
|
||||
exp: Math.floor(Date.now() / 1000) + (5 * 60), // 5 minutes from now
|
||||
};
|
||||
|
||||
// Configure SIP if dialin settings are provided
|
||||
if (dialin_settings !== null) {
|
||||
const sip_config = {
|
||||
display_name: From,
|
||||
sip_mode: 'dial-in',
|
||||
num_endpoints: call_transfer !== null ? 2 : 1,
|
||||
codecs: {"audio": ["OPUS"]},
|
||||
};
|
||||
daily_room_properties.sip = sip_config;
|
||||
}
|
||||
|
||||
// Prepare payload for {service}/start API call
|
||||
const payload = {
|
||||
createDailyRoom: true,
|
||||
dailyRoomProperties: daily_room_properties,
|
||||
body: {
|
||||
dialin_settings,
|
||||
dialout_settings,
|
||||
voicemail_detection,
|
||||
call_transfer,
|
||||
},
|
||||
};
|
||||
|
||||
logger.debug(`Daily room properties: ${JSON.stringify(daily_room_properties)}`);
|
||||
|
||||
// Get Daily API key and agent name from environment variables
|
||||
const pccApiKey = process.env.PIPECAT_CLOUD_API_KEY;
|
||||
const agentName = process.env.AGENT_NAME || 'my-first-agent';
|
||||
|
||||
if (!pccApiKey) {
|
||||
throw new Error('PIPECAT_CLOUD_API_KEY environment variable is not set');
|
||||
}
|
||||
|
||||
// Set up headers for Daily API call
|
||||
const headers = {
|
||||
'Authorization': `Bearer ${pccApiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
};
|
||||
|
||||
const url = `https://api.pipecat.daily.co/v1/public/${agentName}/start`;
|
||||
logger.debug(`Making API call to Daily: ${url} ${JSON.stringify(headers)} ${JSON.stringify(payload)}`);
|
||||
|
||||
try {
|
||||
const response = await axios.post(url, payload, { headers });
|
||||
logger.debug(`Response: ${JSON.stringify(response.data)}`);
|
||||
|
||||
return res.status(200).json({
|
||||
status: 'success',
|
||||
data: response.data,
|
||||
room_properties: daily_room_properties,
|
||||
});
|
||||
} catch (error) {
|
||||
if (error.response) {
|
||||
// Pass through status code and error details from the Daily API
|
||||
const statusCode = error.response.status;
|
||||
const errorDetail = error.response.data || error.message;
|
||||
logger.error(`HTTP error: ${JSON.stringify(errorDetail)}`);
|
||||
return res.status(statusCode).json(errorDetail);
|
||||
} else {
|
||||
logger.error(`Request error: ${error.message}`);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Unexpected error: ${error.message}`);
|
||||
return res.status(500).json({ error: 'Internal server error', message: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
// Configure body parser to preserve raw body text
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: {
|
||||
sizeLimit: '1mb',
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,6 @@
|
||||
import { logger } from '../../lib/utils';
|
||||
|
||||
export default function handler(req, res) {
|
||||
logger.info('Received request to /api');
|
||||
res.status(200).json({ message: 'Hello, World! from ᓚᘏᗢ' });
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
module.exports = {
|
||||
version: 2,
|
||||
buildCommand: "next build",
|
||||
outputDirectory: ".next",
|
||||
cleanUrls: true
|
||||
};
|
||||
@@ -50,9 +50,9 @@ async def main(room_url: str, token: str):
|
||||
token,
|
||||
"bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
@@ -61,7 +61,7 @@ async def main(room_url: str, token: str):
|
||||
api_key=os.getenv("CARTESIA_API_KEY"), voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
||||
51
examples/fal-smart-turn/.gitignore
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
dist/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# JavaScript/Node.js
|
||||
node_modules/
|
||||
dist/
|
||||
dist-ssr/
|
||||
*.local
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
|
||||
# Logs
|
||||
logs/
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
|
||||
# Editor/IDE
|
||||
.vscode/*
|
||||
!.vscode/extensions.json
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
.DS_Store
|
||||
|
||||
# Project specific
|
||||
runpod.toml
|
||||
152
examples/fal-smart-turn/README.md
Normal file
@@ -0,0 +1,152 @@
|
||||
# Smart Turn Detection Demo
|
||||
|
||||
This demo showcases Pipecat's Smart Turn Detection feature - an advanced conversational turn detection system that uses machine learning to identify when a speaker has finished their turn in a conversation. Unlike basic Voice Activity Detection (VAD) which only detects speech vs. silence, Smart Turn detects natural conversational cues like intonation patterns, pacing, and linguistic signals.
|
||||
|
||||
This demo uses the [pipecat-ai/smart-turn](https://huggingface.co/pipecat-ai/smart-turn) model - an open-source, community-driven conversational turn detection model designed to provide more natural turn-taking in voice interactions. The model is being hosted on Fal's infrastructure for GPU acceleration, offering inference times between 50-60ms.
|
||||
|
||||
In the client UI, you can see the transcription messages along with the smart-turn model's prediction results in real-time.
|
||||
|
||||
## Try the demo
|
||||
|
||||
Try the hosted version of the demo here: https://pcc-smart-turn.vercel.app/.
|
||||
|
||||
## Run the demo locally
|
||||
|
||||
### Run the Server
|
||||
|
||||
1. Set up and activate your virtual environment:
|
||||
|
||||
```bash
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Create your .env file and set your env vars:
|
||||
|
||||
```bash
|
||||
cp env.example .env
|
||||
```
|
||||
|
||||
Keys to provide:
|
||||
|
||||
- GOOGLE_API_KEY
|
||||
- CARTESIA_API_KEY
|
||||
- DEEPGRAM_API_KEY
|
||||
- DAILY_API_KEY
|
||||
- FAL_SMART_TURN_API_KEY
|
||||
|
||||
4. Run the server:
|
||||
|
||||
```bash
|
||||
LOCAL=1 python server.py
|
||||
```
|
||||
|
||||
### Run the client
|
||||
|
||||
1. Open a new terminal and navigate to the client directory:
|
||||
|
||||
```bash
|
||||
cd client
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
3. Create your .env.local file:
|
||||
|
||||
```bash
|
||||
cp env.local.example .env.local
|
||||
```
|
||||
|
||||
> Note: No keys need to be modified. `NEXT_PUBLIC_API_BASE_URL` is already configured for local use.
|
||||
|
||||
4. Start the development server:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
5. Open [http://localhost:3000](http://localhost:3000) in your browser.
|
||||
|
||||
## Deploy the app
|
||||
|
||||
### Deploy the server to Pipecat Cloud
|
||||
|
||||
1. Navigate to server
|
||||
|
||||
```bash
|
||||
cd server
|
||||
```
|
||||
|
||||
2. You should already have a .env set up from running locally. If not, do that now.
|
||||
|
||||
3. Update your build and deploy scripts.
|
||||
|
||||
- In build.sh, set `DOCKER_USERNAME` and `AGENT_NAME`.
|
||||
- In pcc-deploy.toml, set `image`, which specifies where your Docker image is stored.
|
||||
|
||||
4. Build your Docker image by running the build script:
|
||||
|
||||
```bash
|
||||
./build.sh
|
||||
```
|
||||
|
||||
> Note: This builds, tags and pushes your docker image and assumes Docker Hub is the container registry.
|
||||
|
||||
5. Make sure you have the Pipecat Cloud CLI installed:
|
||||
|
||||
```bash
|
||||
pip install pipecatcloud
|
||||
```
|
||||
|
||||
6. Login via the Pipecat Cloud CLI:
|
||||
|
||||
```bash
|
||||
pcc auth login
|
||||
```
|
||||
|
||||
> Note: If you don't have an account, sign up at https://pipecat.daily.co.
|
||||
|
||||
7. Add a secrets set:
|
||||
|
||||
```bash
|
||||
pcc secrets set pcc-smart-turn-secrets --file .env
|
||||
```
|
||||
|
||||
8. Deploy your agent:
|
||||
|
||||
```bash
|
||||
pcc deploy
|
||||
```
|
||||
|
||||
> Note: This uses your pcc-deploy.toml settings. Modify as needed.
|
||||
|
||||
### Deploy the client to Vercel
|
||||
|
||||
This project uses TypeScript, React, and Next.js, making it a perfect fit for [Vercel](https://vercel.com/).
|
||||
|
||||
- In your client directory, install Vercel's CLI tool: `npm install -g vercel`
|
||||
- Verify it's installed using `vercel --version`
|
||||
- Log in your Vercel account using `vercel login`
|
||||
- Deploy your client to Vercel using `vercel`
|
||||
|
||||
Follow the vercel prompts to deploy your project.
|
||||
|
||||
### Test your deployed app
|
||||
|
||||
Now with the client and server deployed, you can join the call using your Vercel URL.
|
||||
|
||||
See the debug information for the Smart Turn data. It prints a log line for each smart-turn inference:
|
||||
|
||||
```
|
||||
Smart Turn: COMPLETE, Probability: 95.3%, Model inference: 65.23ms, Server processing: 82.09ms, End-to-end: 245.43ms
|
||||
```
|
||||
41
examples/fal-smart-turn/client/.gitignore
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.*
|
||||
.yarn/*
|
||||
!.yarn/patches
|
||||
!.yarn/plugins
|
||||
!.yarn/releases
|
||||
!.yarn/versions
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# env files (can opt-in for committing if needed)
|
||||
.env*
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
3
examples/fal-smart-turn/client/env.local.example
Normal file
@@ -0,0 +1,3 @@
|
||||
NEXT_PUBLIC_API_BASE_URL=http://localhost:7860
|
||||
PIPECAT_CLOUD_API_KEY=
|
||||
AGENT_NAME=pcc-smart-turn
|
||||
16
examples/fal-smart-turn/client/eslint.config.mjs
Normal file
@@ -0,0 +1,16 @@
|
||||
import { dirname } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import { FlatCompat } from "@eslint/eslintrc";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
const compat = new FlatCompat({
|
||||
baseDirectory: __dirname,
|
||||
});
|
||||
|
||||
const eslintConfig = [
|
||||
...compat.extends("next/core-web-vitals", "next/typescript"),
|
||||
];
|
||||
|
||||
export default eslintConfig;
|
||||
7
examples/fal-smart-turn/client/next.config.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
import type { NextConfig } from "next";
|
||||
|
||||
const nextConfig: NextConfig = {
|
||||
/* config options here */
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
5174
examples/fal-smart-turn/client/package-lock.json
generated
Normal file
28
examples/fal-smart-turn/client/package.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"name": "my-nextjs-app",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/client-react": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.10",
|
||||
"next": "15.3.1",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/eslintrc": "^3",
|
||||
"@types/node": "^20",
|
||||
"@types/react": "^19",
|
||||
"@types/react-dom": "^19",
|
||||
"eslint": "^9",
|
||||
"eslint-config-next": "15.2.3",
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
7
examples/fal-smart-turn/client/public/favicon.svg
Normal file
@@ -0,0 +1,7 @@
|
||||
<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M3.3088 5.05615C3.64682 4.92779 4.02833 5.02411 4.26653 5.29797L7.36884 8.86461H16.6312L19.7335 5.29797C19.9717 5.02411 20.3532 4.92779 20.6912 5.05615C21.0292 5.18452 21.253 5.51072 21.253 5.87504V13.75H24V15.5H19.5181V8.19909L17.6762 10.3167C17.5115 10.506 17.2738 10.6146 17.0241 10.6146H6.9759C6.72616 10.6146 6.48854 10.506 6.32383 10.3167L4.48193 8.19909V15.5H0V13.75H2.74699V5.87504C2.74699 5.51072 2.97078 5.18452 3.3088 5.05615Z" fill="black"/>
|
||||
<path d="M19.5181 17.25H24V19H19.5181V17.25Z" fill="black"/>
|
||||
<path d="M0 17.25H4.48193V19H0V17.25Z" fill="black"/>
|
||||
<path d="M9.25301 14.3333C9.25301 14.9777 8.73517 15.5 8.09639 15.5C7.4576 15.5 6.93976 14.9777 6.93976 14.3333C6.93976 13.689 7.4576 13.1667 8.09639 13.1667C8.73517 13.1667 9.25301 13.689 9.25301 14.3333Z" fill="black"/>
|
||||
<path d="M17.0602 14.3333C17.0602 14.9777 16.5424 15.5 15.9036 15.5C15.2648 15.5 14.747 14.9777 14.747 14.3333C14.747 13.689 15.2648 13.1667 15.9036 13.1667C16.5424 13.1667 17.0602 13.689 17.0602 14.3333Z" fill="black"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.1 KiB |
44
examples/fal-smart-turn/client/src/app/api/connect/route.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
import { NextResponse, NextRequest } from 'next/server';
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
const { MY_CUSTOM_DATA } = await request.json();
|
||||
|
||||
try {
|
||||
const response = await fetch(
|
||||
`https://api.pipecat.daily.co/v1/public/${process.env.AGENT_NAME}/start`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${process.env.PIPECAT_CLOUD_API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
// Create Daily room
|
||||
createDailyRoom: true,
|
||||
// Optionally set Daily room properties
|
||||
dailyRoomProperties: { start_video_off: true },
|
||||
// Optionally pass custom data to the bot
|
||||
body: { MY_CUSTOM_DATA },
|
||||
}),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`API responded with status: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Transform the response to match what RTVI client expects
|
||||
return NextResponse.json({
|
||||
room_url: data.dailyRoom,
|
||||
token: data.dailyToken,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('API error:', error);
|
||||
return NextResponse.json(
|
||||
{ error: 'Failed to start agent' },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
82
examples/fal-smart-turn/client/src/app/globals.css
Normal file
@@ -0,0 +1,82 @@
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
.app {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.status-bar {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 10px;
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.controls button {
|
||||
padding: 8px 16px;
|
||||
margin-left: 10px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.connect-btn {
|
||||
background-color: #4caf50;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.disconnect-btn {
|
||||
background-color: #f44336;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.main-content {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.bot-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.video-container {
|
||||
width: 640px;
|
||||
height: 360px;
|
||||
background-color: #ddd;
|
||||
margin-bottom: 20px;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.video-container video {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
}
|
||||
|
||||
.mic-enabled {
|
||||
background-color: #4caf50;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.mic-disabled {
|
||||
background-color: #f44336;
|
||||
color: white;
|
||||
}
|
||||
27
examples/fal-smart-turn/client/src/app/layout.tsx
Normal file
@@ -0,0 +1,27 @@
|
||||
import './globals.css';
|
||||
import { RTVIProvider } from '@/providers/RTVIProvider';
|
||||
|
||||
export const metadata = {
|
||||
title: 'Pipecat React Client',
|
||||
description: 'Pipecat RTVI Client using Next.js',
|
||||
icons: {
|
||||
icon: [{ url: '/favicon.svg', type: 'image/svg+xml' }],
|
||||
},
|
||||
};
|
||||
|
||||
export default function RootLayout({
|
||||
children,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
}) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<head>
|
||||
<link rel="icon" href="/favicon.svg" type="image/svg+xml" />
|
||||
</head>
|
||||
<body>
|
||||
<RTVIProvider>{children}</RTVIProvider>
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||
41
examples/fal-smart-turn/client/src/app/page.tsx
Normal file
@@ -0,0 +1,41 @@
|
||||
'use client';
|
||||
|
||||
import {
|
||||
RTVIClientAudio,
|
||||
RTVIClientVideo,
|
||||
useRTVIClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
import { ConnectButton } from '../components/ConnectButton';
|
||||
import { StatusDisplay } from '../components/StatusDisplay';
|
||||
import { DebugDisplay } from '../components/DebugDisplay';
|
||||
|
||||
function BotVideo() {
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const isConnected = transportState !== 'disconnected';
|
||||
|
||||
return (
|
||||
<div className="bot-container">
|
||||
<div className="video-container">
|
||||
{isConnected && <RTVIClientVideo participant="bot" fit="cover" />}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default function Home() {
|
||||
return (
|
||||
<div className="app">
|
||||
<div className="status-bar">
|
||||
<StatusDisplay />
|
||||
<ConnectButton />
|
||||
</div>
|
||||
|
||||
<div className="main-content">
|
||||
<BotVideo />
|
||||
</div>
|
||||
|
||||
<DebugDisplay />
|
||||
<RTVIClientAudio />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
import {
|
||||
useRTVIClient,
|
||||
useRTVIClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
|
||||
export function ConnectButton() {
|
||||
const client = useRTVIClient();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const isConnected = ['connected', 'ready'].includes(transportState);
|
||||
|
||||
const handleClick = async () => {
|
||||
if (!client) {
|
||||
console.error('RTVI client is not initialized');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
if (isConnected) {
|
||||
await client.disconnect();
|
||||
} else {
|
||||
await client.connect();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Connection error:', error);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="controls">
|
||||
<button
|
||||
className={isConnected ? 'disconnect-btn' : 'connect-btn'}
|
||||
onClick={handleClick}
|
||||
disabled={
|
||||
!client || ['connecting', 'disconnecting'].includes(transportState)
|
||||
}>
|
||||
{isConnected ? 'Disconnect' : 'Connect'}
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
.debug-panel {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.debug-panel h3 {
|
||||
margin: 0 0 10px 0;
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.debug-log {
|
||||
height: 200px;
|
||||
overflow-y: auto;
|
||||
background-color: #f8f8f8;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
font-family: monospace;
|
||||
font-size: 12px;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.debug-log div {
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
171
examples/fal-smart-turn/client/src/components/DebugDisplay.tsx
Normal file
@@ -0,0 +1,171 @@
|
||||
import { useRef, useCallback } from 'react';
|
||||
import {
|
||||
Participant,
|
||||
RTVIEvent,
|
||||
TransportState,
|
||||
TranscriptData,
|
||||
BotLLMTextData,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import { useRTVIClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import './DebugDisplay.css';
|
||||
|
||||
interface SmartTurnResultData {
|
||||
type: 'smart_turn_result';
|
||||
is_complete: boolean;
|
||||
probability: number;
|
||||
inference_time_ms: number; // Pure model inference time
|
||||
server_total_time_ms: number; // Server processing time
|
||||
e2e_processing_time_ms: number; // Complete end-to-end time
|
||||
}
|
||||
|
||||
export function DebugDisplay() {
|
||||
const debugLogRef = useRef<HTMLDivElement>(null);
|
||||
const client = useRTVIClient();
|
||||
|
||||
const log = useCallback((message: string) => {
|
||||
if (!debugLogRef.current) return;
|
||||
|
||||
const entry = document.createElement('div');
|
||||
entry.textContent = `${new Date().toISOString()} - ${message}`;
|
||||
|
||||
// Add styling based on message type
|
||||
if (message.startsWith('User: ')) {
|
||||
entry.style.color = '#2196F3'; // blue for user
|
||||
} else if (message.startsWith('Bot: ')) {
|
||||
entry.style.color = '#4CAF50'; // green for bot
|
||||
} else if (message.includes('Smart Turn:')) {
|
||||
entry.style.color = '#9C27B0'; // purple for smart turn
|
||||
}
|
||||
|
||||
debugLogRef.current.appendChild(entry);
|
||||
debugLogRef.current.scrollTop = debugLogRef.current.scrollHeight;
|
||||
}, []);
|
||||
|
||||
// Log transport state changes
|
||||
useRTVIClientEvent(
|
||||
RTVIEvent.TransportStateChanged,
|
||||
useCallback(
|
||||
(state: TransportState) => {
|
||||
log(`Transport state changed: ${state}`);
|
||||
},
|
||||
[log]
|
||||
)
|
||||
);
|
||||
|
||||
// Log bot connection events
|
||||
useRTVIClientEvent(
|
||||
RTVIEvent.BotConnected,
|
||||
useCallback(
|
||||
(participant?: Participant) => {
|
||||
log(`Bot connected: ${JSON.stringify(participant)}`);
|
||||
},
|
||||
[log]
|
||||
)
|
||||
);
|
||||
|
||||
useRTVIClientEvent(
|
||||
RTVIEvent.BotDisconnected,
|
||||
useCallback(
|
||||
(participant?: Participant) => {
|
||||
log(`Bot disconnected: ${JSON.stringify(participant)}`);
|
||||
},
|
||||
[log]
|
||||
)
|
||||
);
|
||||
|
||||
// Log track events
|
||||
useRTVIClientEvent(
|
||||
RTVIEvent.TrackStarted,
|
||||
useCallback(
|
||||
(track: MediaStreamTrack, participant?: Participant) => {
|
||||
log(
|
||||
`Track started: ${track.kind} from ${participant?.name || 'unknown'}`
|
||||
);
|
||||
},
|
||||
[log]
|
||||
)
|
||||
);
|
||||
|
||||
useRTVIClientEvent(
|
||||
RTVIEvent.TrackStopped,
|
||||
useCallback(
|
||||
(track: MediaStreamTrack, participant?: Participant) => {
|
||||
log(
|
||||
`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`
|
||||
);
|
||||
},
|
||||
[log]
|
||||
)
|
||||
);
|
||||
|
||||
// Log bot ready state and check tracks
|
||||
useRTVIClientEvent(
|
||||
RTVIEvent.BotReady,
|
||||
useCallback(() => {
|
||||
log(`Bot ready`);
|
||||
|
||||
if (!client) return;
|
||||
|
||||
const tracks = client.tracks();
|
||||
log(
|
||||
`Available tracks: ${JSON.stringify({
|
||||
local: {
|
||||
audio: !!tracks.local.audio,
|
||||
video: !!tracks.local.video,
|
||||
},
|
||||
bot: {
|
||||
audio: !!tracks.bot?.audio,
|
||||
video: !!tracks.bot?.video,
|
||||
},
|
||||
})}`
|
||||
);
|
||||
}, [client, log])
|
||||
);
|
||||
|
||||
// Log transcripts
|
||||
useRTVIClientEvent(
|
||||
RTVIEvent.UserTranscript,
|
||||
useCallback(
|
||||
(data: TranscriptData) => {
|
||||
// Only log final transcripts
|
||||
if (data.final) {
|
||||
log(`User: ${data.text}`);
|
||||
}
|
||||
},
|
||||
[log]
|
||||
)
|
||||
);
|
||||
|
||||
useRTVIClientEvent(
|
||||
RTVIEvent.BotTranscript,
|
||||
useCallback(
|
||||
(data: BotLLMTextData) => {
|
||||
log(`Bot: ${data.text}`);
|
||||
},
|
||||
[log]
|
||||
)
|
||||
);
|
||||
|
||||
useRTVIClientEvent(
|
||||
RTVIEvent.ServerMessage,
|
||||
useCallback(
|
||||
(data: SmartTurnResultData) => {
|
||||
log(
|
||||
`Smart Turn:
|
||||
${data.is_complete ? 'COMPLETE' : 'INCOMPLETE'},
|
||||
Probability: ${(data.probability * 100).toFixed(1)}%,
|
||||
Model inference: ${data.inference_time_ms?.toFixed(2) || 'N/A'}ms,
|
||||
Server processing: ${data.server_total_time_ms?.toFixed(2) || 'N/A'}ms,
|
||||
End-to-end: ${data.e2e_processing_time_ms?.toFixed(2) || 'N/A'}ms`
|
||||
);
|
||||
},
|
||||
[log]
|
||||
)
|
||||
);
|
||||
return (
|
||||
<div className="debug-panel">
|
||||
<h3>Debug Info</h3>
|
||||
<div ref={debugLogRef} className="debug-log" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
import { useRTVIClientTransportState } from '@pipecat-ai/client-react';
|
||||
|
||||
export function StatusDisplay() {
|
||||
const transportState = useRTVIClientTransportState();
|
||||
|
||||
return (
|
||||
<div className="status">
|
||||
Status: <span>{transportState}</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
'use client';
|
||||
|
||||
import { RTVIClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { RTVIClientProvider } from '@pipecat-ai/client-react';
|
||||
import { PropsWithChildren, useEffect, useState } from 'react';
|
||||
|
||||
// Get the API base URL from environment variables
|
||||
// Default to "/api" if not specified
|
||||
// "/api" is the default for Next.js API routes and used
|
||||
// for the Pipecat Cloud deployed agent
|
||||
const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL || '/api';
|
||||
|
||||
console.log('Using API base URL:', API_BASE_URL);
|
||||
|
||||
export function RTVIProvider({ children }: PropsWithChildren) {
|
||||
const [client, setClient] = useState<RTVIClient | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const transport = new DailyTransport();
|
||||
|
||||
const rtviClient = new RTVIClient({
|
||||
transport,
|
||||
params: {
|
||||
baseUrl: API_BASE_URL,
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
requestData: { foo: 'bar' },
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
setClient(rtviClient);
|
||||
}, []);
|
||||
|
||||
if (!client) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return <RTVIClientProvider client={client}>{children}</RTVIClientProvider>;
|
||||
}
|
||||
28
examples/fal-smart-turn/client/tsconfig.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2017",
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"plugins": [
|
||||
{
|
||||
"name": "next"
|
||||
}
|
||||
],
|
||||
"paths": {
|
||||
"@/components/*": ["./src/components/*"],
|
||||
"@/providers/*": ["./src/providers/*"]
|
||||
}
|
||||
},
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
8
examples/fal-smart-turn/server/Dockerfile
Normal file
@@ -0,0 +1,8 @@
|
||||
FROM dailyco/pipecat-base:latest
|
||||
|
||||
COPY ./requirements.txt requirements.txt
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
||||
|
||||
COPY ./assets assets
|
||||
COPY ./bot.py bot.py
|
||||
BIN
examples/fal-smart-turn/server/assets/robot01.png
Normal file
|
After Width: | Height: | Size: 759 KiB |
BIN
examples/fal-smart-turn/server/assets/robot010.png
Normal file
|
After Width: | Height: | Size: 884 KiB |
BIN
examples/fal-smart-turn/server/assets/robot011.png
Normal file
|
After Width: | Height: | Size: 876 KiB |
BIN
examples/fal-smart-turn/server/assets/robot012.png
Normal file
|
After Width: | Height: | Size: 881 KiB |
BIN
examples/fal-smart-turn/server/assets/robot013.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/fal-smart-turn/server/assets/robot014.png
Normal file
|
After Width: | Height: | Size: 874 KiB |
BIN
examples/fal-smart-turn/server/assets/robot015.png
Normal file
|
After Width: | Height: | Size: 882 KiB |
BIN
examples/fal-smart-turn/server/assets/robot016.png
Normal file
|
After Width: | Height: | Size: 885 KiB |
BIN
examples/fal-smart-turn/server/assets/robot017.png
Normal file
|
After Width: | Height: | Size: 888 KiB |
BIN
examples/fal-smart-turn/server/assets/robot018.png
Normal file
|
After Width: | Height: | Size: 890 KiB |
BIN
examples/fal-smart-turn/server/assets/robot019.png
Normal file
|
After Width: | Height: | Size: 898 KiB |
BIN
examples/fal-smart-turn/server/assets/robot02.png
Normal file
|
After Width: | Height: | Size: 836 KiB |
BIN
examples/fal-smart-turn/server/assets/robot020.png
Normal file
|
After Width: | Height: | Size: 903 KiB |
BIN
examples/fal-smart-turn/server/assets/robot021.png
Normal file
|
After Width: | Height: | Size: 908 KiB |
BIN
examples/fal-smart-turn/server/assets/robot022.png
Normal file
|
After Width: | Height: | Size: 908 KiB |
BIN
examples/fal-smart-turn/server/assets/robot023.png
Normal file
|
After Width: | Height: | Size: 905 KiB |
BIN
examples/fal-smart-turn/server/assets/robot024.png
Normal file
|
After Width: | Height: | Size: 903 KiB |
BIN
examples/fal-smart-turn/server/assets/robot025.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/fal-smart-turn/server/assets/robot03.png
Normal file
|
After Width: | Height: | Size: 849 KiB |
BIN
examples/fal-smart-turn/server/assets/robot04.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/fal-smart-turn/server/assets/robot05.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/fal-smart-turn/server/assets/robot06.png
Normal file
|
After Width: | Height: | Size: 864 KiB |
BIN
examples/fal-smart-turn/server/assets/robot07.png
Normal file
|
After Width: | Height: | Size: 858 KiB |
BIN
examples/fal-smart-turn/server/assets/robot08.png
Normal file
|
After Width: | Height: | Size: 875 KiB |
BIN
examples/fal-smart-turn/server/assets/robot09.png
Normal file
|
After Width: | Height: | Size: 881 KiB |
299
examples/fal-smart-turn/server/bot.py
Normal file
@@ -0,0 +1,299 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
from pipecatcloud.agent import DailySessionArguments
|
||||
|
||||
from pipecat.audio.turn.smart_turn.fal_smart_turn import FalSmartTurnAnalyzer
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
MetricsFrame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
)
|
||||
from pipecat.metrics.metrics import SmartTurnMetricsData
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import (
|
||||
RTVIConfig,
|
||||
RTVIObserver,
|
||||
RTVIProcessor,
|
||||
RTVIServerMessageFrame,
|
||||
)
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Check if we're in local development mode
|
||||
LOCAL = os.getenv("LOCAL")
|
||||
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
sprites = []
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
# Load sequential animation frames
|
||||
for i in range(1, 26):
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
# Create a smooth animation by adding reversed frames
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
|
||||
# Define static and animated states
|
||||
quiet_frame = sprites[0] # Static frame for when bot is listening
|
||||
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
|
||||
|
||||
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""Manages the bot's visual animation states.
|
||||
|
||||
Switches between static (listening) and animated (talking) states based on
|
||||
the bot's current speaking status.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and update animation state.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Switch to talking animation when bot starts speaking
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
# Return to static frame when bot stops speaking
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self.push_frame(quiet_frame)
|
||||
self._is_talking = False
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class SmartTurnMetricsProcessor(FrameProcessor):
|
||||
"""Processes the metrics data from Smart Turn Analyzer.
|
||||
|
||||
This processor is responsible for handling smart turn metrics data
|
||||
and forwarding it to the client UI via RTVI.
|
||||
"""
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and handle Smart Turn metrics.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Handle Smart Turn metrics
|
||||
if isinstance(frame, MetricsFrame):
|
||||
for metrics in frame.data:
|
||||
if isinstance(metrics, SmartTurnMetricsData):
|
||||
logger.info(f"Smart Turn metrics: {metrics}")
|
||||
|
||||
# Create a payload with the smart turn prediction data
|
||||
smart_turn_data = {
|
||||
"type": "smart_turn_result",
|
||||
"is_complete": metrics.is_complete,
|
||||
"probability": metrics.probability,
|
||||
"inference_time_ms": metrics.inference_time_ms,
|
||||
"server_total_time_ms": metrics.server_total_time_ms,
|
||||
"e2e_processing_time_ms": metrics.e2e_processing_time_ms,
|
||||
}
|
||||
|
||||
# Send the data to the client via RTVI
|
||||
rtvi_frame = RTVIServerMessageFrame(data=smart_turn_data)
|
||||
await self.push_frame(rtvi_frame)
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(transport: DailyTransport):
|
||||
# Configure your STT, LLM, and TTS services here
|
||||
# Swap out different processors or properties to customize your bot
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
# Set up the initial context for the conversation
|
||||
# You can specified initial system and assistant messages here
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.",
|
||||
},
|
||||
]
|
||||
|
||||
# This sets up the LLM context by providing messages and tools
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
ta = TalkingAnimation()
|
||||
smart_turn_metrics_processor = SmartTurnMetricsProcessor()
|
||||
|
||||
# RTVI events for Pipecat client UI
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
# A core voice AI pipeline
|
||||
# Add additional processors to customize the bot's behavior
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
smart_turn_metrics_processor,
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
logger.debug("Client ready event received")
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
logger.info("First participant joined: {}", participant["id"])
|
||||
# Push a static frame to show the bot is listening
|
||||
await task.queue_frame(quiet_frame)
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
logger.info("Participant left: {}", participant)
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False, force_gc=True)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(args: DailySessionArguments):
|
||||
"""Main bot entry point compatible with the FastAPI route handler.
|
||||
|
||||
Args:
|
||||
room_url: The Daily room URL
|
||||
token: The Daily room token
|
||||
body: The configuration object from the request body
|
||||
session_id: The session ID for logging
|
||||
"""
|
||||
from pipecat.audio.filters.krisp_filter import KrispFilter
|
||||
|
||||
logger.info(f"Bot process initialized {args.room_url} {args.token}")
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
args.room_url,
|
||||
args.token,
|
||||
"Smart Turn Bot",
|
||||
params=DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_in_filter=KrispFilter(),
|
||||
audio_out_enabled=True,
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=576,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=FalSmartTurnAnalyzer(
|
||||
api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=session
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
await main(transport)
|
||||
logger.info("Bot process completed")
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in bot process: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
# Local development
|
||||
async def local_daily():
|
||||
"""Daily transport for local development."""
|
||||
from runner import configure
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Smart Turn Bot",
|
||||
params=DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=576,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=FalSmartTurnAnalyzer(
|
||||
api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=session
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
await main(transport)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in local development mode: {e}")
|
||||
|
||||
|
||||
# Local development entry point
|
||||
if LOCAL and __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(local_daily())
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to run in local mode: {e}")
|
||||
19
examples/fal-smart-turn/server/build.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
VERSION="0.1"
|
||||
DOCKER_USERNAME=""
|
||||
AGENT_NAME="pcc-smart-turn"
|
||||
|
||||
# Build the Docker image with the correct context
|
||||
echo "Building Docker image..."
|
||||
docker build --platform=linux/arm64 -t "$DOCKER_USERNAME/$AGENT_NAME:$VERSION" -t "$DOCKER_USERNAME/$AGENT_NAME:latest" .
|
||||
|
||||
# Push the Docker images
|
||||
echo "Pushing Docker image $DOCKER_USERNAME/$AGENT_NAME:$VERSION..."
|
||||
docker push "$DOCKER_USERNAME/$AGENT_NAME:$VERSION"
|
||||
|
||||
echo "Pushing Docker image $DOCKER_USERNAME/$AGENT_NAME:latest..."
|
||||
docker push "$DOCKER_USERNAME/$AGENT_NAME:latest"
|
||||
|
||||
echo "Successfully built and pushed $DOCKER_USERNAME/$AGENT_NAME:$VERSION and $DOCKER_USERNAME/$AGENT_NAME:latest"
|
||||
5
examples/fal-smart-turn/server/env.example
Normal file
@@ -0,0 +1,5 @@
|
||||
GOOGLE_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
DEEPGRAM_API_KEY=
|
||||
DAILY_API_KEY=
|
||||
FAL_SMART_TURN_API_KEY=
|
||||
7
examples/fal-smart-turn/server/pcc-deploy.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
agent_name = "pcc-smart-turn"
|
||||
image = "your-username/pcc-smart-turn:0.1"
|
||||
secret_set = "pcc-smart-turn-secrets"
|
||||
enable_krisp = true
|
||||
|
||||
[scaling]
|
||||
min_instances = 0
|
||||
3
examples/fal-smart-turn/server/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
pipecatcloud
|
||||
pipecat-ai[google,daily,deepgram,cartesia,silero]
|
||||
python-dotenv
|
||||
56
examples/fal-smart-turn/server/runner.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
"""Configure the Daily room and Daily REST helper."""
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Daily API Key (needed to create an owner token for the room)",
|
||||
)
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
|
||||
key = args.apikey or os.getenv("DAILY_API_KEY")
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
228
examples/fal-smart-turn/server/server.py
Normal file
@@ -0,0 +1,228 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""RTVI Bot Server Implementation.
|
||||
|
||||
This FastAPI server manages RTVI bot instances and provides endpoints for both
|
||||
direct browser access and RTVI client connections. It handles:
|
||||
- Creating Daily rooms
|
||||
- Managing bot processes
|
||||
- Providing connection credentials
|
||||
- Monitoring bot status
|
||||
|
||||
Requirements:
|
||||
- Daily API key (set in .env file)
|
||||
- Python 3.10+
|
||||
- FastAPI
|
||||
- Running bot implementation
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any, Dict
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Maximum number of bot instances allowed per room
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Dictionary to track bot processes: {pid: (process, room_url)}
|
||||
bot_procs = {}
|
||||
|
||||
# Store Daily API helpers
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
"""Cleanup function to terminate all bot processes.
|
||||
|
||||
Called during server shutdown.
|
||||
"""
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""FastAPI lifespan manager that handles startup and shutdown tasks.
|
||||
|
||||
- Creates aiohttp session
|
||||
- Initializes Daily API helper
|
||||
- Cleans up resources on shutdown
|
||||
"""
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
# Initialize FastAPI app with lifespan manager
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
# Configure CORS to allow requests from any origin
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
async def create_room_and_token() -> tuple[str, str]:
|
||||
"""Helper function to create a Daily room and generate an access token.
|
||||
|
||||
Returns:
|
||||
tuple[str, str]: A tuple containing (room_url, token)
|
||||
|
||||
Raises:
|
||||
HTTPException: If room creation or token generation fails
|
||||
"""
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
if not room.url:
|
||||
raise HTTPException(status_code=500, detail="Failed to create room")
|
||||
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
if not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
return room.url, token
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def start_agent(request: Request):
|
||||
"""Endpoint for direct browser access to the bot.
|
||||
|
||||
Creates a room, starts a bot instance, and redirects to the Daily room URL.
|
||||
|
||||
Returns:
|
||||
RedirectResponse: Redirects to the Daily room URL
|
||||
|
||||
Raises:
|
||||
HTTPException: If room creation, token generation, or bot startup fails
|
||||
"""
|
||||
print("Creating room")
|
||||
room_url, token = await create_room_and_token()
|
||||
print(f"Room URL: {room_url}")
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None
|
||||
)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(status_code=500, detail=f"Max bot limit reached for room: {room_url}")
|
||||
|
||||
# Spawn a new bot process
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[f"python3 bot.py -u {room_url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
|
||||
|
||||
@app.post("/connect")
|
||||
async def rtvi_connect(request: Request) -> Dict[Any, Any]:
|
||||
"""RTVI connect endpoint that creates a room and returns connection credentials.
|
||||
|
||||
This endpoint is called by RTVI clients to establish a connection.
|
||||
|
||||
Returns:
|
||||
Dict[Any, Any]: Authentication bundle containing room_url and token
|
||||
|
||||
Raises:
|
||||
HTTPException: If room creation, token generation, or bot startup fails
|
||||
"""
|
||||
print("Creating room for RTVI connection")
|
||||
room_url, token = await create_room_and_token()
|
||||
print(f"Room URL: {room_url}")
|
||||
|
||||
# Start the bot process
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[f"python3 -m bot -u {room_url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
# Return the authentication bundle in format expected by DailyTransport
|
||||
return {"room_url": room_url, "token": token}
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
def get_status(pid: int):
|
||||
"""Get the status of a specific bot process.
|
||||
|
||||
Args:
|
||||
pid (int): Process ID of the bot
|
||||
|
||||
Returns:
|
||||
JSONResponse: Status information for the bot
|
||||
|
||||
Raises:
|
||||
HTTPException: If the specified bot process is not found
|
||||
"""
|
||||
# Look up the subprocess
|
||||
proc = bot_procs.get(pid)
|
||||
|
||||
# If the subprocess doesn't exist, return an error
|
||||
if not proc:
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
|
||||
# Check the status of the subprocess
|
||||
status = "running" if proc[0].poll() is None else "finished"
|
||||
return JSONResponse({"bot_id": pid, "status": status})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
# Parse command line arguments for server configuration
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str, default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int, default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
# Start the FastAPI server
|
||||
uvicorn.run(
|
||||
"server:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
reload=config.reload,
|
||||
)
|
||||
@@ -4,54 +4,55 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.piper.tts import PiperTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async def main():
|
||||
# Create a transport using the WebRTC connection
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
tts = PiperTTSService(
|
||||
base_url=os.getenv("PIPER_BASE_URL"), aiohttp_session=session, sample_rate=24000
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await task.queue_frames(
|
||||
[TTSSpeakFrame(f"Hello there, how are you today ?"), EndFrame()]
|
||||
)
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
from run import main
|
||||
|
||||
main()
|
||||
|
||||
60
examples/foundational/01-say-one-thing-rime.py
Normal file
@@ -0,0 +1,60 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.rime.tts import RimeHttpTTSService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
# Create a transport using the WebRTC connection
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tts = RimeHttpTTSService(
|
||||
api_key=os.getenv("RIME_API_KEY", ""),
|
||||
voice_id="rex",
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from run import main
|
||||
|
||||
main()
|
||||
@@ -4,56 +4,53 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
# Create a transport using the WebRTC connection
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
runner = PipelineRunner()
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant.get("info", {}).get("userName", "")
|
||||
await task.queue_frames(
|
||||
[TTSSpeakFrame(f"Hello there, {participant_name}!"), EndFrame()]
|
||||
)
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
from run import main
|
||||
|
||||
main()
|
||||
|
||||
@@ -4,51 +4,50 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.riva.tts import FastPitchTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
# Create a transport using the WebRTC connection
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
|
||||
|
||||
tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
runner = PipelineRunner()
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant.get("info", {}).get("userName", "")
|
||||
await task.queue_frames([TTSSpeakFrame(f"Aloha, {participant_name}!"), EndFrame()])
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
from run import main
|
||||
|
||||
main()
|
||||
|
||||
@@ -4,14 +4,11 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -19,46 +16,51 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
# Create a transport using the WebRTC connection
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing From an LLM", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an LLM in a WebRTC session, and this is a 'hello world' demo. Say hello to the world.",
|
||||
}
|
||||
]
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an LLM in a WebRTC session, and this is a 'hello world' demo. Say hello to the world.",
|
||||
}
|
||||
]
|
||||
task = PipelineTask(Pipeline([llm, tts, transport.output()]))
|
||||
|
||||
runner = PipelineRunner()
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([LLMMessagesFrame(messages), EndFrame()])
|
||||
|
||||
task = PipelineTask(Pipeline([llm, tts, transport.output()]))
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await task.queue_frames([LLMMessagesFrame(messages), EndFrame()])
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
from run import main
|
||||
|
||||
main()
|
||||
|
||||
@@ -4,59 +4,68 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.fal.image import FalImageGenService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async def main():
|
||||
# Create a transport using the WebRTC connection
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
),
|
||||
)
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Show a still frame image",
|
||||
DailyParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([imagegen, transport.output()]))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frame(TextFrame("a cat in the style of picasso"))
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
from run import main
|
||||
|
||||
main()
|
||||
|
||||
@@ -33,9 +33,7 @@ async def main():
|
||||
|
||||
transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TkTransportParams(
|
||||
camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024
|
||||
),
|
||||
TkTransportParams(video_out_enabled=True, video_out_width=1024, video_out_height=1024),
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
|
||||
@@ -4,62 +4,68 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.google.image import GoogleImageGenService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
# Create a transport using the WebRTC connection
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
),
|
||||
)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Show a still frame image",
|
||||
DailyParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
|
||||
)
|
||||
imagegen = GoogleImageGenService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
)
|
||||
|
||||
imagegen = GoogleImageGenService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
)
|
||||
task = PipelineTask(
|
||||
Pipeline([imagegen, transport.output()]),
|
||||
params=PipelineParams(enable_metrics=True),
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frame(TextFrame("a cat in the style of picasso"))
|
||||
await task.queue_frame(TextFrame("a dog in the style of picasso"))
|
||||
await task.queue_frame(TextFrame("a fish in the style of picasso"))
|
||||
|
||||
task = PipelineTask(
|
||||
Pipeline([imagegen, transport.output()]),
|
||||
params=PipelineParams(enable_metrics=True),
|
||||
)
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await task.queue_frame(TextFrame("a cat in the style of picasso"))
|
||||
await task.queue_frame(TextFrame("a dog in the style of picasso"))
|
||||
await task.queue_frame(TextFrame("a fish in the style of picasso"))
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
from run import main
|
||||
|
||||
main()
|
||||
|
||||
105
examples/foundational/04-transports-small-webrtc.py
Normal file
@@ -0,0 +1,105 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from run import main
|
||||
|
||||
main()
|
||||