Compare commits
481 Commits
v0.0.31
...
khk/aggreg
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8f2941c575 | ||
|
|
2703813e8a | ||
|
|
521e152150 | ||
|
|
3d43ad0f4d | ||
|
|
3621fceae2 | ||
|
|
e123f33c03 | ||
|
|
b8713666c2 | ||
|
|
cf0ab85e2c | ||
|
|
8502c7c801 | ||
|
|
e89814dc6b | ||
|
|
9461bacf0d | ||
|
|
e276dcbab7 | ||
|
|
1a3de0e819 | ||
|
|
ee3786fe15 | ||
|
|
31b5667cee | ||
|
|
a483f1a083 | ||
|
|
2ecec1c9f8 | ||
|
|
08ac311971 | ||
|
|
cb49b6a0d6 | ||
|
|
016da177db | ||
|
|
b1e17ee347 | ||
|
|
8ee9621d66 | ||
|
|
8edee8155d | ||
|
|
c262b272fa | ||
|
|
9ef9c1c58a | ||
|
|
c7ff79a652 | ||
|
|
da81df5284 | ||
|
|
a4420dc88b | ||
|
|
eeb8338dce | ||
|
|
dfa4ac81fd | ||
|
|
ea16dca8aa | ||
|
|
306632b29a | ||
|
|
9a4e749c7c | ||
|
|
55c645c614 | ||
|
|
a1024bb365 | ||
|
|
dfc82c3ba4 | ||
|
|
9e27a8aad0 | ||
|
|
c73111afea | ||
|
|
26a64afd8d | ||
|
|
78a3f081de | ||
|
|
e8f8a49646 | ||
|
|
219304c5ee | ||
|
|
f3fd312b83 | ||
|
|
357e66d64d | ||
|
|
4fa1ea8c4b | ||
|
|
3b81cd462d | ||
|
|
14acf05a26 | ||
|
|
58d9c84bc9 | ||
|
|
7e39d9ad3d | ||
|
|
a4edb3dab1 | ||
|
|
ed409d0460 | ||
|
|
50b45ac2da | ||
|
|
29bcbc68c5 | ||
|
|
affbe9ac7d | ||
|
|
1790fa452f | ||
|
|
607a246572 | ||
|
|
4f1b06e6b2 | ||
|
|
62e9a33a70 | ||
|
|
3298f935ef | ||
|
|
0e8f56c752 | ||
|
|
8224538372 | ||
|
|
fbf6eef68f | ||
|
|
f078d156de | ||
|
|
23d6eed5ea | ||
|
|
0ed3d118d6 | ||
|
|
337f048864 | ||
|
|
6f3c421621 | ||
|
|
eadd68d40b | ||
|
|
71202e3cd5 | ||
|
|
13a4a05388 | ||
|
|
20c019ae16 | ||
|
|
d9d6571c73 | ||
|
|
540cad4844 | ||
|
|
0a26b650c0 | ||
|
|
adaac003e5 | ||
|
|
3d4f125071 | ||
|
|
bce87f8717 | ||
|
|
1fe940bd6b | ||
|
|
cb36a71381 | ||
|
|
5acc4928fe | ||
|
|
434493b8aa | ||
|
|
f08b25dbb2 | ||
|
|
3665734972 | ||
|
|
a98d78cdea | ||
|
|
80f6d74e80 | ||
|
|
02d926e9bd | ||
|
|
7749692f72 | ||
|
|
7807cbeb39 | ||
|
|
72f231b327 | ||
|
|
3cbe97d346 | ||
|
|
b880e1a60e | ||
|
|
886046e696 | ||
|
|
9106a5f8ae | ||
|
|
98286336bf | ||
|
|
081b001c8b | ||
|
|
c92531a02f | ||
|
|
748a7af602 | ||
|
|
f4a0de6327 | ||
|
|
e405d7af9f | ||
|
|
51cd7fd285 | ||
|
|
aba5f89174 | ||
|
|
5c0f5a1613 | ||
|
|
7c342f7ba2 | ||
|
|
37e2388758 | ||
|
|
05f0492a8d | ||
|
|
c0ac5c6ae8 | ||
|
|
be923687fb | ||
|
|
5f32fb125d | ||
|
|
ae6fbb3146 | ||
|
|
864768635a | ||
|
|
d7c9679977 | ||
|
|
fedfc366f6 | ||
|
|
b3b39626e1 | ||
|
|
4e0ece17b6 | ||
|
|
fd3fdacdee | ||
|
|
a253606d50 | ||
|
|
568d9dc0a3 | ||
|
|
6629b853c5 | ||
|
|
3931cb3235 | ||
|
|
38cd86ad52 | ||
|
|
c0cdabf61d | ||
|
|
51270a96c5 | ||
|
|
84d72c0d5c | ||
|
|
79aca8169a | ||
|
|
b9d362bd62 | ||
|
|
87c4a1bee1 | ||
|
|
c979762b70 | ||
|
|
1d92fc3199 | ||
|
|
8ac7fb1a67 | ||
|
|
60c3d33def | ||
|
|
8a39d3f4eb | ||
|
|
e038767b6f | ||
|
|
0c46b3e481 | ||
|
|
d42f072ff5 | ||
|
|
9b6f29c24a | ||
|
|
873d5dc23f | ||
|
|
6d141fd47f | ||
|
|
c6f6cb2947 | ||
|
|
0eb189ce7f | ||
|
|
f4fd7b7028 | ||
|
|
21de8e0a35 | ||
|
|
6f55d494bd | ||
|
|
d216edc567 | ||
|
|
ec6063ecc4 | ||
|
|
40fe4ce6fb | ||
|
|
31d87a4048 | ||
|
|
ac8b171fa9 | ||
|
|
1f06d78213 | ||
|
|
28eba17df8 | ||
|
|
dfc2e62339 | ||
|
|
80c89a39c9 | ||
|
|
9d1c16e996 | ||
|
|
86604c2353 | ||
|
|
8f31a02938 | ||
|
|
47d375309d | ||
|
|
980265ca97 | ||
|
|
90479fff95 | ||
|
|
1ce1fcb0ce | ||
|
|
1a662376fc | ||
|
|
1d24f926ec | ||
|
|
4f2c37c940 | ||
|
|
042115a6bb | ||
|
|
c9f1469b41 | ||
|
|
54c9f604c9 | ||
|
|
56fbcd6562 | ||
|
|
e6b0500568 | ||
|
|
41038b6673 | ||
|
|
26d03f26c9 | ||
|
|
f3a4e54996 | ||
|
|
925e80bb20 | ||
|
|
9bda09b1a8 | ||
|
|
ef0d0531fa | ||
|
|
6520f20ffe | ||
|
|
ebc4e0924b | ||
|
|
9e7c0e6033 | ||
|
|
cf5720f316 | ||
|
|
655b468269 | ||
|
|
17f8c93e44 | ||
|
|
5b4061b0d5 | ||
|
|
6ce0227e98 | ||
|
|
a583a28850 | ||
|
|
32daf65adc | ||
|
|
e22c80610e | ||
|
|
374f1e7e01 | ||
|
|
d2dfa93bf1 | ||
|
|
fa8c6712c6 | ||
|
|
4c2b84cb4d | ||
|
|
b57c9d569b | ||
|
|
f0e50ba000 | ||
|
|
4a6638f749 | ||
|
|
31577252f3 | ||
|
|
5d71c50080 | ||
|
|
981269d594 | ||
|
|
848db985fc | ||
|
|
d5d8e31447 | ||
|
|
66670a2370 | ||
|
|
5637f349c6 | ||
|
|
93248e1d00 | ||
|
|
187769357f | ||
|
|
5be6422cc8 | ||
|
|
8670b2d994 | ||
|
|
0bc6db428d | ||
|
|
67d565930e | ||
|
|
b2a7ff6fd3 | ||
|
|
425a730d7c | ||
|
|
84c5709722 | ||
|
|
94deec01c9 | ||
|
|
6e0dd4a779 | ||
|
|
14bde340dd | ||
|
|
253765c611 | ||
|
|
2b26d7182f | ||
|
|
61ac83e2d9 | ||
|
|
d5c7b28cad | ||
|
|
959580a708 | ||
|
|
3a5cd17ea3 | ||
|
|
b78981bb9d | ||
|
|
a6d90b0a00 | ||
|
|
67016492f2 | ||
|
|
2c38089527 | ||
|
|
48f68ba6dc | ||
|
|
574df4ba3d | ||
|
|
49ca16d125 | ||
|
|
87525b085e | ||
|
|
6b53c6add3 | ||
|
|
29ca1b7855 | ||
|
|
a42d0c9907 | ||
|
|
8bc6ceaa3d | ||
|
|
0b8a1ab5d1 | ||
|
|
358c287db2 | ||
|
|
2e68453655 | ||
|
|
89b8a9de7d | ||
|
|
c4c2058df9 | ||
|
|
0d85c0085f | ||
|
|
6fa8a8f84f | ||
|
|
a97775bff3 | ||
|
|
32640e054d | ||
|
|
aa42da5658 | ||
|
|
900a94a825 | ||
|
|
c37552de70 | ||
|
|
916b37926c | ||
|
|
2b76c3c15a | ||
|
|
cedd7dde18 | ||
|
|
d088608d8e | ||
|
|
06ee29bb8b | ||
|
|
d255e954d6 | ||
|
|
6a7ab6b8ac | ||
|
|
45b18cc0b1 | ||
|
|
0479431f0a | ||
|
|
ec58dbd791 | ||
|
|
91de68aab3 | ||
|
|
85efc30145 | ||
|
|
0032594f21 | ||
|
|
829fdc5679 | ||
|
|
22e176e329 | ||
|
|
826a70a137 | ||
|
|
dd0ea674af | ||
|
|
a4761b8921 | ||
|
|
3958bb7903 | ||
|
|
83a037a7ce | ||
|
|
a3eb8337a6 | ||
|
|
541072f8e0 | ||
|
|
881248cbd6 | ||
|
|
d4979f5e64 | ||
|
|
4133cd03bb | ||
|
|
9f07c3ca27 | ||
|
|
b20bacb9ed | ||
|
|
97cfbfee1d | ||
|
|
fa7c941792 | ||
|
|
4738879f32 | ||
|
|
d5d88f756a | ||
|
|
65b136bf15 | ||
|
|
bee0b238e4 | ||
|
|
c891168ffb | ||
|
|
6376c2f6aa | ||
|
|
4d9b7cdd61 | ||
|
|
8263d1dd6f | ||
|
|
faf41c0b36 | ||
|
|
27a09c0b2c | ||
|
|
3db7f6a284 | ||
|
|
3bfeb5b5ef | ||
|
|
62a7a555b5 | ||
|
|
d60e99a043 | ||
|
|
77723b34c7 | ||
|
|
c466d34a06 | ||
|
|
f816897833 | ||
|
|
c1e8a5e522 | ||
|
|
76aca32f2e | ||
|
|
7e31b2a795 | ||
|
|
028e38a86b | ||
|
|
8cf7649855 | ||
|
|
64f5119b08 | ||
|
|
4d606aefb3 | ||
|
|
4bafdaa04d | ||
|
|
5afe1abf82 | ||
|
|
f066d50b98 | ||
|
|
91103e21cc | ||
|
|
f44dabcd65 | ||
|
|
0fd2fca231 | ||
|
|
5bb64098e7 | ||
|
|
3fc85e75e0 | ||
|
|
3f61ea16b7 | ||
|
|
4b393092b5 | ||
|
|
b583f5162b | ||
|
|
060a22f395 | ||
|
|
d3e85355f1 | ||
|
|
83e730b768 | ||
|
|
5fcc96446c | ||
|
|
ad88925154 | ||
|
|
0a6ddbf15c | ||
|
|
08e0722d97 | ||
|
|
05d4fba551 | ||
|
|
f41c2b3c9f | ||
|
|
69f64899fe | ||
|
|
33f0865430 | ||
|
|
ad5b9202ab | ||
|
|
1676693091 | ||
|
|
0852b50b8f | ||
|
|
eb998aa502 | ||
|
|
6dab0e9de7 | ||
|
|
95ff1d141c | ||
|
|
87bc8a9da6 | ||
|
|
087fe9a537 | ||
|
|
c1170260b5 | ||
|
|
65cdf50774 | ||
|
|
9233bb490c | ||
|
|
43932220f7 | ||
|
|
cea4d1894e | ||
|
|
80baa0358d | ||
|
|
5d73db53a0 | ||
|
|
302ea90dce | ||
|
|
37b04ed283 | ||
|
|
be6995cfdf | ||
|
|
dfbc11300c | ||
|
|
82d539d174 | ||
|
|
6e00f31014 | ||
|
|
a46ac3cc92 | ||
|
|
6fbf98d8e2 | ||
|
|
f094c42728 | ||
|
|
13827e1282 | ||
|
|
32170b47d9 | ||
|
|
09c05354c2 | ||
|
|
b0b1475563 | ||
|
|
b85dd7283a | ||
|
|
846ae765e5 | ||
|
|
4c629e538e | ||
|
|
f6e22bb3b9 | ||
|
|
46a048d7f6 | ||
|
|
bd9f4eea06 | ||
|
|
0a672e61e2 | ||
|
|
29a8530221 | ||
|
|
3e738642a7 | ||
|
|
f551f55f03 | ||
|
|
9f012c8002 | ||
|
|
0a69a9e5ef | ||
|
|
194790183a | ||
|
|
2227721173 | ||
|
|
77a53da5f5 | ||
|
|
ab63ff275d | ||
|
|
e5363f65f0 | ||
|
|
ffc157de65 | ||
|
|
f9fdadb4c0 | ||
|
|
4efccb79f2 | ||
|
|
337968199a | ||
|
|
37027f68cb | ||
|
|
d1b62c5495 | ||
|
|
355fe01cb7 | ||
|
|
9d050a16c7 | ||
|
|
fa53c67606 | ||
|
|
5006376fe6 | ||
|
|
2204b8e205 | ||
|
|
270007b17c | ||
|
|
568eb2ef4c | ||
|
|
73ca9184a8 | ||
|
|
5e8e11e16e | ||
|
|
029bbc16f2 | ||
|
|
9e3d87e4f6 | ||
|
|
f1410a1127 | ||
|
|
2b980d16c3 | ||
|
|
b2b97aafb8 | ||
|
|
da2082b025 | ||
|
|
327ea9d547 | ||
|
|
b23db4a202 | ||
|
|
d1a36004ab | ||
|
|
6071920c45 | ||
|
|
5f539e1fba | ||
|
|
8e1539c360 | ||
|
|
065cfb2aca | ||
|
|
3147534e86 | ||
|
|
be5603bf16 | ||
|
|
b9b0bcdcbd | ||
|
|
5bcece56f3 | ||
|
|
d67faef88c | ||
|
|
8f6db5e905 | ||
|
|
82e93a0560 | ||
|
|
a9a82c083b | ||
|
|
974d9c33ed | ||
|
|
c1957ab694 | ||
|
|
b20a10a4bc | ||
|
|
be14ce465d | ||
|
|
d1ca0c5614 | ||
|
|
535514f506 | ||
|
|
933b63cf13 | ||
|
|
d7c3e380a5 | ||
|
|
c5298f78cb | ||
|
|
4f8f7b8d1d | ||
|
|
d7d46919ac | ||
|
|
e5d73d2e2e | ||
|
|
b145e8ec90 | ||
|
|
97ff4a1fb8 | ||
|
|
5018a552c1 | ||
|
|
7f9fd9ffce | ||
|
|
ddd0ca6a8f | ||
|
|
06f817c7e3 | ||
|
|
df4c3e56c4 | ||
|
|
9d5c2b9656 | ||
|
|
7ce59c5e2e | ||
|
|
1c9631fc78 | ||
|
|
efbe7297f7 | ||
|
|
1b45946a61 | ||
|
|
cbf5a6362c | ||
|
|
583b96c341 | ||
|
|
fc0920504d | ||
|
|
abd65a93b2 | ||
|
|
c3244fdd7a | ||
|
|
e8f58938b0 | ||
|
|
602b4f34b1 | ||
|
|
0399c84dfa | ||
|
|
fd5d879bf5 | ||
|
|
8dff460307 | ||
|
|
cce1ddb183 | ||
|
|
8691d14289 | ||
|
|
dd402da9e5 | ||
|
|
2fd04248f1 | ||
|
|
0ac42006f8 | ||
|
|
66e331248d | ||
|
|
4be3e8c87d | ||
|
|
dac033fe61 | ||
|
|
d302cbb114 | ||
|
|
e3b407db28 | ||
|
|
4ef623f09e | ||
|
|
253530a63d | ||
|
|
4f38d989f5 | ||
|
|
84074e90ee | ||
|
|
38aee7d8f2 | ||
|
|
64198313c6 | ||
|
|
d61b6c301c | ||
|
|
83d1931266 | ||
|
|
c31f2ab285 | ||
|
|
0ddc5721b4 | ||
|
|
98bd183bc4 | ||
|
|
aaa154524c | ||
|
|
beced68337 | ||
|
|
94823ab952 | ||
|
|
0b6a19802f | ||
|
|
c4a2d2197c | ||
|
|
269d06aa15 | ||
|
|
dfef1f2c54 | ||
|
|
b62beaba0b | ||
|
|
adf414e40f | ||
|
|
dc64e57f63 | ||
|
|
d3e410b2ac | ||
|
|
c544b2474b | ||
|
|
18243de358 | ||
|
|
6625895d1f | ||
|
|
f9ecce739e | ||
|
|
0075dd8386 | ||
|
|
eef1cde816 | ||
|
|
8d867c30c6 | ||
|
|
42c668b7ae | ||
|
|
b62227b4ae | ||
|
|
25ef0cb87b | ||
|
|
e195941aa5 | ||
|
|
e09eef1dd7 | ||
|
|
7c13663a4e | ||
|
|
5753869e5e | ||
|
|
ba878a19f4 | ||
|
|
55a9de78cd | ||
|
|
ff51fc9091 | ||
|
|
a4f857ee34 | ||
|
|
3250d74bef | ||
|
|
c086160239 |
@@ -1,4 +1,4 @@
|
||||
name: lint
|
||||
name: format
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -12,12 +12,12 @@ on:
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: build-lint-${{ github.event.pull_request.number || github.ref }}
|
||||
group: build-format-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
autopep8:
|
||||
name: "Formatting lints"
|
||||
ruff-format:
|
||||
name: "Formatting checker"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: "3.10"
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
@@ -34,11 +34,8 @@ jobs:
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r dev-requirements.txt
|
||||
- name: autopep8
|
||||
id: autopep8
|
||||
- name: Ruff formatter
|
||||
id: ruff
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
autopep8 --max-line-length 100 --exit-code -r -d --exclude "*_pb2.py" -a -a src/
|
||||
- name: Fail if autopep8 requires changes
|
||||
if: steps.autopep8.outputs.exit-code == 2
|
||||
run: exit 1
|
||||
ruff format --config line-length=100 --diff --exclude "*_pb2.py"
|
||||
7
.github/workflows/publish_test.yaml
vendored
7
.github/workflows/publish_test.yaml
vendored
@@ -1,10 +1,6 @@
|
||||
name: publish-test
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
on: workflow_dispatch
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -14,7 +10,6 @@ jobs:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.gitref }}
|
||||
fetch-tags: true
|
||||
fetch-depth: 100
|
||||
- name: Set up Python
|
||||
|
||||
20
.github/workflows/tests.yaml
vendored
20
.github/workflows/tests.yaml
vendored
@@ -20,21 +20,17 @@ jobs:
|
||||
name: "Unit and Integration Tests"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Cache virtual environment
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
# We are hashing requirements-dev.txt and requirements-extra.txt which
|
||||
# contain all dependencies needed to run the tests and examples.
|
||||
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('linux-py3.10-requirements.txt') }}-${{ hashFiles('dev-requirements.txt') }}
|
||||
path: .venv
|
||||
python-version: "3.10"
|
||||
- name: Install system packages
|
||||
run: sudo apt-get install -y portaudio19-dev
|
||||
id: install_system_packages
|
||||
run: |
|
||||
sudo apt-get install -y portaudio19-dev
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
@@ -42,8 +38,8 @@ jobs:
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r linux-py3.10-requirements.txt -r dev-requirements.txt
|
||||
pip install -r dev-requirements.txt -r test-requirements.txt
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" src tests
|
||||
pytest --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests
|
||||
|
||||
466
CHANGELOG.md
466
CHANGELOG.md
@@ -5,6 +5,470 @@ All notable changes to **pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Added `AsyncGeneratorProcessor`. This processor can be used together with a
|
||||
`FrameSerializer` as an async generator. It provides a `generator()` function
|
||||
that returns an `AsyncGenerator` and that yields serialized frames.
|
||||
|
||||
- Added `EndTaskFrame` and `CancelTaskFrame`. These are new frames that are
|
||||
meant to be pushed upstream to tell the pipeline task to stop nicely or
|
||||
immediately respectively.
|
||||
|
||||
- Added configurable LLM parameters (e.g., temperature, top_p, max_tokens, seed)
|
||||
for OpenAI, Anthropic, and Together AI services along with corresponding
|
||||
setter functions.
|
||||
|
||||
- Added `sample_rate` as a constructor parameter for TTS services.
|
||||
|
||||
- Pipecat has a pipeline-based architecture. The pipeline consists of frame
|
||||
processors linked to each other. The elements traveling across the pipeline
|
||||
are called frames.
|
||||
|
||||
To have a deterministic behavior the frames traveling through the pipeline
|
||||
should always be ordered, except system frames which are out-of-band
|
||||
frames. To achieve that, each frame processor should only output frames from a
|
||||
single task.
|
||||
|
||||
In this version we introduce synchronous and asynchronous frame
|
||||
processors. The synchronous processors push output frames from the same task
|
||||
that they receive input frames, and therefore only pushing frames from one
|
||||
task. Asynchronous frame processors can have internal tasks to perform things
|
||||
asynchronously (e.g. receiving data from a websocket) but they also have a
|
||||
single task where they push frames from.
|
||||
|
||||
By default, frame processors are synchronous. To change a frame processor to
|
||||
asynchronous you only need to pass `sync=False` to the base class constructor.
|
||||
|
||||
- Added pipeline clocks. A pipeline clock is used by the output transport to
|
||||
know when a frame needs to be presented. For that, all frames now have an
|
||||
optional `pts` field (prensentation timestamp). There's currently just one
|
||||
clock implementation `SystemClock` and the `pts` field is currently only used
|
||||
for `TextFrame`s (audio and image frames will be next).
|
||||
|
||||
- A clock can now be specified to `PipelineTask` (defaults to
|
||||
`SystemClock`). This clock will be passed to each frame processor via the
|
||||
`StartFrame`.
|
||||
|
||||
- Added `CartesiaHttpTTSService`. This is a synchronous frame processor
|
||||
(i.e. given an input text frame it will wait for the whole output before
|
||||
returning).
|
||||
|
||||
- `DailyTransport` now supports setting the audio bitrate to improve audio
|
||||
quality through the `DailyParams.audio_out_bitrate` parameter. The new
|
||||
default is 96kbps.
|
||||
|
||||
- `DailyTransport` now uses the number of audio output channels (1 or 2) to set
|
||||
mono or stereo audio when needed.
|
||||
|
||||
- Interruptions support has been added to `TwilioFrameSerializer` when using
|
||||
`FastAPIWebsocketTransport`.
|
||||
|
||||
- Added new `LmntTTSService` text-to-speech service.
|
||||
(see https://www.lmnt.com/)
|
||||
|
||||
- Added `TTSModelUpdateFrame`, `TTSLanguageUpdateFrame`, `STTModelUpdateFrame`,
|
||||
and `STTLanguageUpdateFrame` frames to allow you to switch models, language
|
||||
and voices in TTS and STT services.
|
||||
|
||||
- Added new `transcriptions.Language` enum.
|
||||
|
||||
### Changed
|
||||
|
||||
- We now distinguish between input and output audio and image frames. We
|
||||
introduce `InputAudioRawFrame`, `OutputAudioRawFrame`, `InputImageRawFrame`
|
||||
and `OutputImageRawFrame` (and other subclasses of those). The input frames
|
||||
usually come from an input transport and are meant to be processed inside the
|
||||
pipeline to generate new frames. However, the input frames will not be sent
|
||||
through an output transport. The output frames can also be processed by any
|
||||
frame processor in the pipeline and they are allowed to be sent by the output
|
||||
transport.
|
||||
|
||||
- `ParallelTask` has been renamed to `SyncParallelPipeline`. A
|
||||
`SyncParallelPipeline` is a frame processor that contains a list of different
|
||||
pipelines to be executed concurrently. The difference between a
|
||||
`SyncParallelPipeline` and a `ParallelPipeline` is that, given an input frame,
|
||||
the `SyncParallelPipeline` will wait for all the internal pipelines to
|
||||
complete. This is achieved by ensuring all the processors in each of the
|
||||
internal pipelines are synchronous.
|
||||
|
||||
- `StartFrame` is back a system frame so we make sure it's processed immediately
|
||||
by all processors. `EndFrame` stays a control frame since it needs to be
|
||||
ordered allowing the frames in the pipeline to be processed.
|
||||
|
||||
- Updated `MoondreamService` revision to `2024-08-26`.
|
||||
|
||||
- `CartesiaTTSService` and `ElevenLabsTTSService` now add presentation
|
||||
timestamps to their text output. This allows the output transport to push the
|
||||
text frames downstream at almost the same time the words are spoken. We say
|
||||
"almost" because currently the audio frames don't have presentation timestamp
|
||||
but they should be played at roughly the same time.
|
||||
|
||||
- `DailyTransport.on_joined` event now returns the full session data instead of
|
||||
just the participant.
|
||||
|
||||
- `CartesiaTTSService` is now a subclass of `TTSService`.
|
||||
|
||||
- `DeepgramSTTService` is now a subclass of `STTService`.
|
||||
|
||||
- `WhisperSTTService` is now a subclass of `SegmentedSTTService`. A
|
||||
`SegmentedSTTService` is a `STTService` where the provided audio is given in a
|
||||
big chunk (i.e. from when the user starts speaking until the user stops
|
||||
speaking) instead of a continous stream.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `BaseOutputTransport` issue that would stop audio and video rendering
|
||||
tasks (after receiving and `EndFrame`) before the internal queue was emptied,
|
||||
causing the pipeline to finish prematurely.
|
||||
|
||||
- `StartFrame` should be the first frame every processor receives to avoid
|
||||
situations where things are not initialized (because initialization happens on
|
||||
`StartFrame`) and other frames come in resulting in undesired behavior.
|
||||
|
||||
### Performance
|
||||
|
||||
- `obj_id()` and `obj_count()` now use `itertools.count` avoiding the need of
|
||||
`threading.Lock`.
|
||||
|
||||
## [0.0.41] - 2024-08-22
|
||||
|
||||
### Added
|
||||
|
||||
- Added `LivekitFrameSerializer` audio frame serializer.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix `FastAPIWebsocketOutputTransport` variable name clash with subclass.
|
||||
|
||||
- Fix an `AnthropicLLMService` issue with empty arguments in function calling.
|
||||
|
||||
### Other
|
||||
|
||||
- Fixed `studypal` example errors.
|
||||
|
||||
## [0.0.40] - 2024-08-20
|
||||
|
||||
### Added
|
||||
|
||||
- VAD parameters can now be dynamicallt updated using the
|
||||
`VADParamsUpdateFrame`.
|
||||
|
||||
- `ErrorFrame` has now a `fatal` field to indicate the bot should exit if a
|
||||
fatal error is pushed upstream (false by default). A new `FatalErrorFrame`
|
||||
that sets this flag to true has been added.
|
||||
|
||||
- `AnthropicLLMService` now supports function calling and initial support for
|
||||
prompt caching.
|
||||
(see https://www.anthropic.com/news/prompt-caching)
|
||||
|
||||
- `ElevenLabsTTSService` can now specify ElevenLabs input parameters such as
|
||||
`output_format`.
|
||||
|
||||
- `TwilioFrameSerializer` can now specify Twilio's and Pipecat's desired sample
|
||||
rates to use.
|
||||
|
||||
- Added new `on_participant_updated` event to `DailyTransport`.
|
||||
|
||||
- Added `DailyRESTHelper.delete_room_by_name()` and
|
||||
`DailyRESTHelper.delete_room_by_url()`.
|
||||
|
||||
- Added LLM and TTS usage metrics. Those are enabled when
|
||||
`PipelineParams.enable_usage_metrics` is True.
|
||||
|
||||
- `AudioRawFrame`s are now pushed downstream from the base output
|
||||
transport. This allows capturing the exact words the bot says by adding an STT
|
||||
service at the end of the pipeline.
|
||||
|
||||
- Added new `GStreamerPipelineSource`. This processor can generate image or
|
||||
audio frames from a GStreamer pipeline (e.g. reading an MP4 file, and RTP
|
||||
stream or anything supported by GStreamer).
|
||||
|
||||
- Added `TransportParams.audio_out_is_live`. This flag is False by default and
|
||||
it is useful to indicate we should not synchronize audio with sporadic images.
|
||||
|
||||
- Added new `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame` control
|
||||
frames. These frames are pushed upstream and they should wrap
|
||||
`BotSpeakingFrame`.
|
||||
|
||||
- Transports now allow you to register event handlers without decorators.
|
||||
|
||||
### Changed
|
||||
|
||||
- Support RTVI message protocol 0.1. This includes new messages, support for
|
||||
messages responses, support for actions, configuration, webhooks and a bunch
|
||||
of new cool stuff.
|
||||
(see https://docs.rtvi.ai/)
|
||||
|
||||
- `SileroVAD` dependency is now imported via pip's `silero-vad` package.
|
||||
|
||||
- `ElevenLabsTTSService` now uses `eleven_turbo_v2_5` model by default.
|
||||
|
||||
- `BotSpeakingFrame` is now a control frame.
|
||||
|
||||
- `StartFrame` is now a control frame similar to `EndFrame`.
|
||||
|
||||
- `DeepgramTTSService` now is more customizable. You can adjust the encoding and
|
||||
sample rate.
|
||||
|
||||
### Fixed
|
||||
|
||||
- `TTSStartFrame` and `TTSStopFrame` are now sent when TTS really starts and
|
||||
stops. This allows for knowing when the bot starts and stops speaking even
|
||||
with asynchronous services (like Cartesia).
|
||||
|
||||
- Fixed `AzureSTTService` transcription frame timestamps.
|
||||
|
||||
- Fixed an issue with `DailyRESTHelper.create_room()` expirations which would
|
||||
cause this function to stop working after the initial expiration elapsed.
|
||||
|
||||
- Improved `EndFrame` and `CancelFrame` handling. `EndFrame` should end things
|
||||
gracefully while a `CancelFrame` should cancel all running tasks as soon as
|
||||
possible.
|
||||
|
||||
- Fixed an issue in `AIService` that would cause a yielded `None` value to be
|
||||
processed.
|
||||
|
||||
- RTVI's `bot-ready` message is now sent when the RTVI pipeline is ready and
|
||||
a first participant joins.
|
||||
|
||||
- Fixed a `BaseInputTransport` issue that was causing incoming system frames to
|
||||
be queued instead of being pushed immediately.
|
||||
|
||||
- Fixed a `BaseInputTransport` issue that was causing start/stop interruptions
|
||||
incoming frames to not cancel tasks and be processed properly.
|
||||
|
||||
### Other
|
||||
|
||||
- Added `studypal` example (from to the Cartesia folks!).
|
||||
|
||||
- Most examples now use Cartesia.
|
||||
|
||||
- Added examples `foundational/19a-tools-anthropic.py`,
|
||||
`foundational/19b-tools-video-anthropic.py` and
|
||||
`foundational/19a-tools-togetherai.py`.
|
||||
|
||||
- Added examples `foundational/18-gstreamer-filesrc.py` and
|
||||
`foundational/18a-gstreamer-videotestsrc.py` that show how to use
|
||||
`GStreamerPipelineSource`
|
||||
|
||||
- Remove `requests` library usage.
|
||||
|
||||
- Cleanup examples and use `DailyRESTHelper`.
|
||||
|
||||
## [0.0.39] - 2024-07-23
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a regression introduced in 0.0.38 that would cause Daily transcription
|
||||
to stop the Pipeline.
|
||||
|
||||
## [0.0.38] - 2024-07-23
|
||||
|
||||
### Added
|
||||
|
||||
- Added `force_reload`, `skip_validation` and `trust_repo` to `SileroVAD` and
|
||||
`SileroVADAnalyzer`. This allows caching and various GitHub repo validations.
|
||||
|
||||
- Added `send_initial_empty_metrics` flag to `PipelineParams` to request for
|
||||
initial empty metrics (zero values). True by default.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed initial metrics format. It was using the wrong keys name/time instead of
|
||||
processor/value.
|
||||
|
||||
- STT services should be using ISO 8601 time format for transcription frames.
|
||||
|
||||
- Fixed an issue that would cause Daily transport to show a stop transcription
|
||||
error when actually none occurred.
|
||||
|
||||
## [0.0.37] - 2024-07-22
|
||||
|
||||
### Added
|
||||
|
||||
- Added `RTVIProcessor` which implements the RTVI-AI standard.
|
||||
See https://github.com/rtvi-ai
|
||||
|
||||
- Added `BotInterruptionFrame` which allows interrupting the bot while talking.
|
||||
|
||||
- Added `LLMMessagesAppendFrame` which allows appending messages to the current
|
||||
LLM context.
|
||||
|
||||
- Added `LLMMessagesUpdateFrame` which allows changing the LLM context for the
|
||||
one provided in this new frame.
|
||||
|
||||
- Added `LLMModelUpdateFrame` which allows updating the LLM model.
|
||||
|
||||
- Added `TTSSpeakFrame` which causes the bot say some text. This text will not
|
||||
be part of the LLM context.
|
||||
|
||||
- Added `TTSVoiceUpdateFrame` which allows updating the TTS voice.
|
||||
|
||||
### Removed
|
||||
|
||||
- We remove the `LLMResponseStartFrame` and `LLMResponseEndFrame` frames. These
|
||||
were added in the past to properly handle interruptions for the
|
||||
`LLMAssistantContextAggregator`. But the `LLMContextAggregator` is now based
|
||||
on `LLMResponseAggregator` which handles interruptions properly by just
|
||||
processing the `StartInterruptionFrame`, so there's no need for these extra
|
||||
frames any more.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with `StatelessTextTransformer` where it was pushing a string
|
||||
instead of a `TextFrame`.
|
||||
|
||||
- `TTSService` end of sentence detection has been improved. It now works with
|
||||
acronyms, numbers, hours and others.
|
||||
|
||||
- Fixed an issue in `TTSService` that would not properly flush the current
|
||||
aggregated sentence if an `LLMFullResponseEndFrame` was found.
|
||||
|
||||
### Performance
|
||||
|
||||
- `CartesiaTTSService` now uses websockets which improves speed. It also
|
||||
leverages the new Cartesia contexts which maintains generated audio prosody
|
||||
when multiple inputs are sent, therefore improving audio quality a lot.
|
||||
|
||||
## [0.0.36] - 2024-07-02
|
||||
|
||||
### Added
|
||||
|
||||
- Added `GladiaSTTService`.
|
||||
See https://docs.gladia.io/chapters/speech-to-text-api/pages/live-speech-recognition
|
||||
|
||||
- Added `XTTSService`. This is a local Text-To-Speech service.
|
||||
See https://github.com/coqui-ai/TTS
|
||||
|
||||
- Added `UserIdleProcessor`. This processor can be used to wait for any
|
||||
interaction with the user. If the user doesn't say anything within a given
|
||||
timeout a provided callback is called.
|
||||
|
||||
- Added `IdleFrameProcessor`. This processor can be used to wait for frames
|
||||
within a given timeout. If no frame is received within the timeout a provided
|
||||
callback is called.
|
||||
|
||||
- Added new frame `BotSpeakingFrame`. This frame will be continuously pushed
|
||||
upstream while the bot is talking.
|
||||
|
||||
- It is now possible to specify a Silero VAD version when using `SileroVADAnalyzer`
|
||||
or `SileroVAD`.
|
||||
|
||||
- Added `AysncFrameProcessor` and `AsyncAIService`. Some services like
|
||||
`DeepgramSTTService` need to process things asynchronously. For example, audio
|
||||
is sent to Deepgram but transcriptions are not returned immediately. In these
|
||||
cases we still require all frames (except system frames) to be pushed
|
||||
downstream from a single task. That's what `AsyncFrameProcessor` is for. It
|
||||
creates a task and all frames should be pushed from that task. So, whenever a
|
||||
new Deepgram transcription is ready that transcription will also be pushed
|
||||
from this internal task.
|
||||
|
||||
- The `MetricsFrame` now includes processing metrics if metrics are enabled. The
|
||||
processing metrics indicate the time a processor needs to generate all its
|
||||
output. Note that not all processors generate these kind of metrics.
|
||||
|
||||
### Changed
|
||||
|
||||
- `WhisperSTTService` model can now also be a string.
|
||||
|
||||
- Added missing \* keyword separators in services.
|
||||
|
||||
### Fixed
|
||||
|
||||
- `WebsocketServerTransport` doesn't try to send frames anymore if serializers
|
||||
returns `None`.
|
||||
|
||||
- Fixed an issue where exceptions that occurred inside frame processors were
|
||||
being swallowed and not displayed.
|
||||
|
||||
- Fixed an issue in `FastAPIWebsocketTransport` where it would still try to send
|
||||
data to the websocket after being closed.
|
||||
|
||||
### Other
|
||||
|
||||
- Added Fly.io deployment example in `examples/deployment/flyio-example`.
|
||||
|
||||
- Added new `17-detect-user-idle.py` example that shows how to use the new
|
||||
`UserIdleProcessor`.
|
||||
|
||||
## [0.0.35] - 2024-06-28
|
||||
|
||||
### Changed
|
||||
|
||||
- `FastAPIWebsocketParams` now require a serializer.
|
||||
|
||||
- `TwilioFrameSerializer` now requires a `streamSid`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Silero VAD number of frames needs to be 512 for 16000 sample rate or 256 for
|
||||
8000 sample rate.
|
||||
|
||||
## [0.0.34] - 2024-06-25
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with asynchronous STT services (Deepgram and Azure) that could
|
||||
interruptions to ignore transcriptions.
|
||||
|
||||
- Fixed an issue introduced in 0.0.33 that would cause the LLM to generate
|
||||
shorter output.
|
||||
|
||||
## [0.0.33] - 2024-06-25
|
||||
|
||||
### Changed
|
||||
|
||||
- Upgraded to Cartesia's new Python library 1.0.0. `CartesiaTTSService` now
|
||||
expects a voice ID instead of a voice name (you can get the voice ID from
|
||||
Cartesia's playground). You can also specify the audio `sample_rate` and
|
||||
`encoding` instead of the previous `output_format`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with asynchronous STT services (Deepgram and Azure) that could
|
||||
cause static audio issues and interruptions to not work properly when dealing
|
||||
with multiple LLMs sentences.
|
||||
|
||||
- Fixed an issue that could mix new LLM responses with previous ones when
|
||||
handling interruptions.
|
||||
|
||||
- Fixed a Daily transport blocking situation that occurred while reading audio
|
||||
frames after a participant left the room. Needs daily-python >= 0.10.1.
|
||||
|
||||
## [0.0.32] - 2024-06-22
|
||||
|
||||
### Added
|
||||
|
||||
- Allow specifying a `DeepgramSTTService` url which allows using on-prem
|
||||
Deepgram.
|
||||
|
||||
- Added new `FastAPIWebsocketTransport`. This is a new websocket transport that
|
||||
can be integrated with FastAPI websockets.
|
||||
|
||||
- Added new `TwilioFrameSerializer`. This is a new serializer that knows how to
|
||||
serialize and deserialize audio frames from Twilio.
|
||||
|
||||
- Added Daily transport event: `on_dialout_answered`. See
|
||||
https://reference-python.daily.co/api_reference.html#daily.EventHandler
|
||||
|
||||
- Added new `AzureSTTService`. This allows you to use Azure Speech-To-Text.
|
||||
|
||||
### Performance
|
||||
|
||||
- Convert `BaseOutputTransport` and `BaseOutputTransport` to fully use asyncio
|
||||
and remove the use of threads.
|
||||
|
||||
### Other
|
||||
|
||||
- Added `twilio-chatbot`. This is an example that shows how to integrate Twilio
|
||||
phone numbers with a Pipecat bot.
|
||||
|
||||
- Updated `07f-interruptible-azure.py` to use `AzureLLMService`,
|
||||
`AzureSTTService` and `AzureTTSService`.
|
||||
|
||||
## [0.0.31] - 2024-06-13
|
||||
|
||||
### Performance
|
||||
@@ -227,7 +691,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Added Daily transport support for dial-in use cases.
|
||||
|
||||
- Added Daily transport events: `on_dialout_connected`, `on_dialout_stopped`,
|
||||
`on_dialout_error` and `on_dialout_warning`. See
|
||||
`on_dialout_error` and `on_dialout_warning`. See
|
||||
https://reference-python.daily.co/api_reference.html#daily.EventHandler
|
||||
|
||||
## [0.0.21] - 2024-05-22
|
||||
|
||||
62
README.md
62
README.md
@@ -4,8 +4,7 @@
|
||||
|
||||
# Pipecat
|
||||
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://discord.gg/pipecat)
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
|
||||
|
||||
`pipecat` is a framework for building voice (and multimodal) conversational agents. Things like personal coaches, meeting assistants, [story-telling toys for kids](https://storytelling-chatbot.fly.dev/), customer support bots, [intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0), and snarky social companions.
|
||||
|
||||
@@ -39,7 +38,7 @@ pip install "pipecat-ai[option,...]"
|
||||
|
||||
Your project may or may not need these, so they're made available as optional requirements. Here is a list:
|
||||
|
||||
- **AI services**: `anthropic`, `azure`, `deepgram`, `google`, `fal`, `moondream`, `openai`, `playht`, `silero`, `whisper`
|
||||
- **AI services**: `anthropic`, `azure`, `deepgram`, `gladia`, `google`, `fal`, `lmnt`, `moondream`, `openai`, `openpipe`, `playht`, `silero`, `whisper`, `xtts`
|
||||
- **Transports**: `local`, `websocket`, `daily`
|
||||
|
||||
## Code examples
|
||||
@@ -49,7 +48,7 @@ Your project may or may not need these, so they're made available as optional re
|
||||
|
||||
## A simple voice agent running locally
|
||||
|
||||
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [ElevenLabs](https://elevenlabs.io/) for text-to-speech.
|
||||
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [Cartesia](https://cartesia.ai/) for text-to-speech.
|
||||
|
||||
```python
|
||||
#app.py
|
||||
@@ -61,7 +60,7 @@ from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
async def main():
|
||||
@@ -70,14 +69,13 @@ async def main():
|
||||
transport = DailyTransport(
|
||||
room_url=...,
|
||||
token=...,
|
||||
"Bot Name",
|
||||
DailyParams(audio_out_enabled=True))
|
||||
bot_name="Bot Name",
|
||||
params=DailyParams(audio_out_enabled=True))
|
||||
|
||||
# Use Eleven Labs for Text-to-Speech
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=...,
|
||||
voice_id=...,
|
||||
# Use Cartesia for Text-to-Speech
|
||||
tts = CartesiaTTSService(
|
||||
api_key=...,
|
||||
voice_id=...
|
||||
)
|
||||
|
||||
# Simple pipeline that will process text to speech and output the result
|
||||
@@ -94,7 +92,7 @@ async def main():
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Eleven Labs)
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Cartesia)
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
|
||||
# Run the pipeline task
|
||||
@@ -125,7 +123,7 @@ Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://
|
||||
|
||||
Voice Activity Detection — very important for knowing when a user has finished speaking to your bot. If you are not using press-to-talk, and want Pipecat to detect when the user has finished talking, VAD is an essential component for a natural feeling conversation.
|
||||
|
||||
Pipecast makes use of WebRTC VAD by default when using a WebRTC transport layer. Optionally, you can use Silero VAD for improved accuracy at the cost of higher CPU usage.
|
||||
Pipecat makes use of WebRTC VAD by default when using a WebRTC transport layer. Optionally, you can use Silero VAD for improved accuracy at the cost of higher CPU usage.
|
||||
|
||||
```shell
|
||||
pip install pipecat-ai[silero]
|
||||
@@ -146,20 +144,20 @@ source venv/bin/activate
|
||||
From the root of this repo, run the following:
|
||||
|
||||
```shell
|
||||
pip install -r dev-requirements.txt -r {env}-requirements.txt
|
||||
pip install -r dev-requirements.txt
|
||||
python -m build
|
||||
```
|
||||
|
||||
This builds the package. To use the package locally (eg to run sample files), run
|
||||
This builds the package. To use the package locally (e.g. to run sample files), run
|
||||
|
||||
```shell
|
||||
pip install --editable .
|
||||
pip install --editable ".[option,...]"
|
||||
```
|
||||
|
||||
If you want to use this package from another directory, you can run:
|
||||
|
||||
```shell
|
||||
pip install path_to_this_repo
|
||||
pip install "path_to_this_repo[option,...]"
|
||||
```
|
||||
|
||||
### Running tests
|
||||
@@ -167,27 +165,29 @@ pip install path_to_this_repo
|
||||
From the root directory, run:
|
||||
|
||||
```shell
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" src tests
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests
|
||||
```
|
||||
|
||||
## Setting up your editor
|
||||
|
||||
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting.
|
||||
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting via [Ruff](https://github.com/astral-sh/ruff).
|
||||
|
||||
### Emacs
|
||||
|
||||
You can use [use-package](https://github.com/jwiegley/use-package) to install [py-autopep8](https://codeberg.org/ideasman42/emacs-py-autopep8) package and configure `autopep8` arguments:
|
||||
You can use [use-package](https://github.com/jwiegley/use-package) to install [emacs-lazy-ruff](https://github.com/christophermadsen/emacs-lazy-ruff) package and configure `ruff` arguments:
|
||||
|
||||
```elisp
|
||||
(use-package py-autopep8
|
||||
(use-package lazy-ruff
|
||||
:ensure t
|
||||
:defer t
|
||||
:hook ((python-mode . py-autopep8-mode))
|
||||
:hook ((python-mode . lazy-ruff-mode))
|
||||
:config
|
||||
(setq py-autopep8-options '("-a" "-a", "--max-line-length=100")))
|
||||
(setq lazy-ruff-format-command "ruff format --config line-length=100")
|
||||
(setq lazy-ruff-only-format-block t)
|
||||
(setq lazy-ruff-only-format-region t)
|
||||
(setq lazy-ruff-only-format-buffer t))
|
||||
```
|
||||
|
||||
`autopep8` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
|
||||
`ruff` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
|
||||
|
||||
```elisp
|
||||
(use-package pyvenv-auto
|
||||
@@ -200,18 +200,14 @@ You can use [use-package](https://github.com/jwiegley/use-package) to install [p
|
||||
### Visual Studio Code
|
||||
|
||||
Install the
|
||||
[autopep8](https://marketplace.visualstudio.com/items?itemName=ms-python.autopep8) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, enable formatting on save and configure `autopep8` arguments:
|
||||
[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, enable formatting on save and configure `ruff` arguments:
|
||||
|
||||
```json
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.autopep8",
|
||||
"editor.defaultFormatter": "charliermarsh.ruff",
|
||||
"editor.formatOnSave": true
|
||||
},
|
||||
"autopep8.args": [
|
||||
"-a",
|
||||
"-a",
|
||||
"--max-line-length=100"
|
||||
],
|
||||
"ruff.format.args": ["--config", "line-length=100"]
|
||||
```
|
||||
|
||||
## Getting help
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
autopep8~=2.1.0
|
||||
build~=1.2.1
|
||||
grpcio-tools~=1.62.2
|
||||
pip-tools~=7.4.1
|
||||
pytest~=8.2.0
|
||||
setuptools~=69.5.1
|
||||
pyright~=1.1.376
|
||||
pytest~=8.3.2
|
||||
ruff~=0.6.7
|
||||
setuptools~=72.2.0
|
||||
setuptools_scm~=8.1.0
|
||||
|
||||
@@ -27,6 +27,13 @@ FAL_KEY=...
|
||||
# Fireworks
|
||||
FIREWORKS_API_KEY=...
|
||||
|
||||
# Gladia
|
||||
GLADIA_API_KEY=...
|
||||
|
||||
# LMNT
|
||||
LMNT_API_KEY=...
|
||||
LMNT_VOICE_ID=...
|
||||
|
||||
# PlayHT
|
||||
PLAY_HT_USER_ID=...
|
||||
PLAY_HT_API_KEY=...
|
||||
|
||||
@@ -32,14 +32,16 @@ Next, follow the steps in the README for each demo.
|
||||
|
||||
## Projects:
|
||||
|
||||
| Project | Description | Services |
|
||||
| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- |
|
||||
| [Simple Chatbot](simple-chatbot) | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework. | Deepgram, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience. | Deepgram, ElevenLabs, Open AI, Fal, Daily, Custom UI |
|
||||
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, OpenAI, Moondream, Daily, Daily Prebuilt UI |
|
||||
| Function-calling Chatbot (TBC) | A chatbot that can call functions in response to user input. | Deepgram, OpenAI, Fireworks, Daily, Daily Prebuilt UI |
|
||||
| [Dialin Chatbot](dialin-chatbot) | A chatbot that connects to an incoming phone call from Daily or Twilio. | Deepgram, OpenAI, ElevenLabs, Daily, Twilio |
|
||||
| Project | Description | Services |
|
||||
|----------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
|
||||
| [Simple Chatbot](simple-chatbot) | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience. | Deepgram, ElevenLabs, OpenAI, Fal, Daily, Custom UI |
|
||||
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, ElevenLabs, OpenAI, Moondream, Daily, Daily Prebuilt UI |
|
||||
| [Patient intake](patient-intake) | A chatbot that can call functions in response to user input. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Dialin Chatbot](dialin-chatbot) | A chatbot that connects to an incoming phone call from Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
|
||||
| [Twilio Chatbot](twilio-chatbot) | A chatbot that connects to an incoming phone call from Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
|
||||
| [studypal](studypal) | A chatbot to have a conversation about any article on the web | |
|
||||
|
||||
> [!IMPORTANT]
|
||||
> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.
|
||||
|
||||
13
examples/deployment/flyio-example/Dockerfile
Normal file
13
examples/deployment/flyio-example/Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
||||
FROM python:3.11-bullseye
|
||||
|
||||
# Open port 7860 for http service
|
||||
ENV FAST_API_PORT=7860
|
||||
EXPOSE 7860
|
||||
|
||||
# Install Python dependencies
|
||||
COPY *.py .
|
||||
COPY ./requirements.txt requirements.txt
|
||||
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
|
||||
|
||||
# Start the FastAPI server
|
||||
CMD python3 bot_runner.py --port ${FAST_API_PORT}
|
||||
39
examples/deployment/flyio-example/README.md
Normal file
39
examples/deployment/flyio-example/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# Fly.io deployment example
|
||||
|
||||
This project modifies the `bot_runner.py` server to launch a new machine for each user session. This is a recommended approach for production vs. running shell processess as your deployment will quickly run out of system resources under load.
|
||||
|
||||
For this example, we are using Daily as a WebRTC transport and provisioning a new room and token for each session. You can use another transport, such as WebSockets, by modifying the `bot.py` and `bot_runner.py` files accordingly.
|
||||
|
||||
## Setting up your fly.io deployment
|
||||
|
||||
### Create your fly.toml file
|
||||
|
||||
You can copy the `example-fly.toml` as a reference. Be sure to change the app name to something unique.
|
||||
|
||||
### Create your .env file
|
||||
|
||||
Copy the base `env.example` to `.env` and enter the necessary API keys.
|
||||
|
||||
`FLY_APP_NAME` should match that in the `fly.toml` file.
|
||||
|
||||
### Launch a new fly.io project
|
||||
|
||||
`fly launch` or `fly launch --org your-org-name`
|
||||
|
||||
### Set the necessary app secrets from your .env
|
||||
|
||||
Note: you can do this manually via the fly.io dashboard under the "secrets" sub-section of your deployment (e.g. "https://fly.io/apps/fly-app-name/secrets") or run the following terminal command:
|
||||
|
||||
`cat .env | tr '\n' ' ' | xargs flyctl secrets set`
|
||||
|
||||
### Deploy your machine
|
||||
|
||||
`fly deploy`
|
||||
|
||||
## Connecting to your bot
|
||||
|
||||
Send a post request to your running fly.io instance:
|
||||
|
||||
`curl --location --request POST 'https://YOUR_FLY_APP_NAME/start_bot'`
|
||||
|
||||
This request will wait until the machine enters into a `starting` state, before returning the a room URL and token to join.
|
||||
0
examples/deployment/flyio-example/__init__.py
Normal file
0
examples/deployment/flyio-example/__init__.py
Normal file
104
examples/deployment/flyio-example/bot.py
Normal file
104
examples/deployment/flyio-example/bot.py
Normal file
@@ -0,0 +1,104 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
daily_api_key = os.getenv("DAILY_API_KEY", "")
|
||||
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str):
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your output will be converted to audio so don't include special characters other than '!' or '?' in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying hello.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if state == "left":
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot")
|
||||
parser.add_argument("-u", type=str, help="Room URL")
|
||||
parser.add_argument("-t", type=str, help="Token")
|
||||
config = parser.parse_args()
|
||||
|
||||
asyncio.run(main(config.u, config.t))
|
||||
211
examples/deployment/flyio-example/bot_runner.py
Normal file
211
examples/deployment/flyio-example/bot_runner.py
Normal file
@@ -0,0 +1,211 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomProperties,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# ------------ Configuration ------------ #
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
REQUIRED_ENV_VARS = [
|
||||
"DAILY_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"ELEVENLABS_API_KEY",
|
||||
"ELEVENLABS_VOICE_ID",
|
||||
"FLY_API_KEY",
|
||||
"FLY_APP_NAME",
|
||||
]
|
||||
|
||||
FLY_API_HOST = os.getenv("FLY_API_HOST", "https://api.machines.dev/v1")
|
||||
FLY_APP_NAME = os.getenv("FLY_APP_NAME", "pipecat-fly-example")
|
||||
FLY_API_KEY = os.getenv("FLY_API_KEY", "")
|
||||
FLY_HEADERS = {"Authorization": f"Bearer {FLY_API_KEY}", "Content-Type": "application/json"}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
# ----------------- API ----------------- #
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# ----------------- Main ----------------- #
|
||||
|
||||
|
||||
async def spawn_fly_machine(room_url: str, token: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use the same image as the bot runner
|
||||
async with session.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Unable to get machine info from Fly: {text}")
|
||||
|
||||
data = await r.json()
|
||||
image = data[0]["config"]["image"]
|
||||
|
||||
# Machine configuration
|
||||
cmd = f"python3 bot.py -u {room_url} -t {token}"
|
||||
cmd = cmd.split()
|
||||
worker_props = {
|
||||
"config": {
|
||||
"image": image,
|
||||
"auto_destroy": True,
|
||||
"init": {"cmd": cmd},
|
||||
"restart": {"policy": "no"},
|
||||
"guest": {"cpu_kind": "shared", "cpus": 1, "memory_mb": 1024},
|
||||
},
|
||||
}
|
||||
|
||||
# Spawn a new machine instance
|
||||
async with session.post(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS, json=worker_props
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Problem starting a bot worker: {text}")
|
||||
|
||||
data = await r.json()
|
||||
# Wait for the machine to enter the started state
|
||||
vm_id = data["id"]
|
||||
|
||||
async with session.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started",
|
||||
headers=FLY_HEADERS,
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Bot was unable to enter started state: {text}")
|
||||
|
||||
print(f"Machine joined room: {room_url}")
|
||||
|
||||
|
||||
@app.post("/start_bot")
|
||||
async def start_bot(request: Request) -> JSONResponse:
|
||||
try:
|
||||
data = await request.json()
|
||||
# Is this a webhook creation request?
|
||||
if "test" in data:
|
||||
return JSONResponse({"test": True})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Use specified room URL, or create a new one if not specified
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", "")
|
||||
|
||||
if not room_url:
|
||||
params = DailyRoomParams(properties=DailyRoomProperties())
|
||||
try:
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Unable to provision room {e}")
|
||||
else:
|
||||
# Check passed room URL exists, we should assume that it already has a sip set up
|
||||
try:
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
# Launch a new fly.io machine, or run as a shell process (not recommended)
|
||||
run_as_process = os.getenv("RUN_AS_PROCESS", False)
|
||||
|
||||
if run_as_process:
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
else:
|
||||
try:
|
||||
await spawn_fly_machine(room.url, token)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to spawn VM: {e}")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
user_token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"room_url": room.url,
|
||||
"token": user_token,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
for env_var in REQUIRED_ENV_VARS:
|
||||
if env_var not in os.environ:
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument(
|
||||
"--host", type=str, default=os.getenv("HOST", "0.0.0.0"), help="Host address"
|
||||
)
|
||||
parser.add_argument("--port", type=int, default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument(
|
||||
"--reload", action="store_true", default=False, help="Reload code on change"
|
||||
)
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run("bot_runner:app", host=config.host, port=config.port, reload=config.reload)
|
||||
except KeyboardInterrupt:
|
||||
print("Pipecat runner shutting down...")
|
||||
8
examples/deployment/flyio-example/env.example
Normal file
8
examples/deployment/flyio-example/env.example
Normal file
@@ -0,0 +1,8 @@
|
||||
DAILY_API_KEY=
|
||||
DAILY_SAMPLE_ROOM_URL= # Enter a Daily room URL to use a set room URL each time (useful for local testing)
|
||||
OPENAI_API_KEY=
|
||||
ELEVENLABS_API_KEY=
|
||||
ELEVENLABS_VOICE_ID=
|
||||
FLY_API_KEY=
|
||||
FLY_APP_NAME=
|
||||
RUN_AS_PROCESS= # Spawn fly.io machine for each session or run as local process
|
||||
25
examples/deployment/flyio-example/example-fly.toml
Normal file
25
examples/deployment/flyio-example/example-fly.toml
Normal file
@@ -0,0 +1,25 @@
|
||||
# fly.toml app configuration file generated for pipecat-fly-example on 2024-07-01T15:04:53+01:00
|
||||
#
|
||||
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
|
||||
#
|
||||
|
||||
app = 'pipecat-fly-example'
|
||||
primary_region = 'sjc'
|
||||
|
||||
[build]
|
||||
|
||||
[env]
|
||||
FLY_APP_NAME = 'pipecat-fly-example'
|
||||
|
||||
[http_service]
|
||||
internal_port = 7860
|
||||
force_https = true
|
||||
auto_stop_machines = true
|
||||
auto_start_machines = true
|
||||
min_machines_running = 0
|
||||
processes = ['app']
|
||||
|
||||
[[vm]]
|
||||
memory = 512
|
||||
cpu_kind = 'shared'
|
||||
cpus = 1
|
||||
5
examples/deployment/flyio-example/requirements.txt
Normal file
5
examples/deployment/flyio-example/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
pipecat-ai[daily,openai,silero]
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
loguru
|
||||
@@ -1,5 +1,4 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
@@ -7,11 +6,11 @@ import argparse
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.frames.frames import (
|
||||
LLMMessagesFrame,
|
||||
EndFrame
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyDialinSettings
|
||||
@@ -19,6 +18,7 @@ from pipecat.vad.silero import SileroVADAnalyzer
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -29,75 +29,70 @@ daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str, callId: str, callDomain: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# diallin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
diallin_settings = DailyDialinSettings(
|
||||
call_id=callId,
|
||||
call_domain=callDomain
|
||||
)
|
||||
# diallin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
diallin_settings = DailyDialinSettings(call_id=callId, call_domain=callDomain)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=diallin_settings,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
)
|
||||
)
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=diallin_settings,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Oh, hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Oh, hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
])
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -6,40 +6,62 @@ provisioning a room and starting a Pipecat bot in response.
|
||||
|
||||
Refer to README for more information.
|
||||
"""
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomSipParams, DailyRoomParams
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, PlainTextResponse
|
||||
from twilio.twiml.voice_response import VoiceResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomProperties,
|
||||
DailyRoomSipParams,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# ------------ Configuration ------------ #
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
REQUIRED_ENV_VARS = ['OPENAI_API_KEY', 'DAILY_API_KEY',
|
||||
'ELEVENLABS_API_KEY', 'ELEVENLABS_VOICE_ID']
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
os.getenv("DAILY_API_KEY", ""),
|
||||
os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'))
|
||||
REQUIRED_ENV_VARS = ["OPENAI_API_KEY", "DAILY_API_KEY", "ELEVENLABS_API_KEY", "ELEVENLABS_VOICE_ID"]
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
# ----------------- API ----------------- #
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"]
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
"""
|
||||
@@ -53,61 +75,51 @@ action using the Twilio Client library.
|
||||
"""
|
||||
|
||||
|
||||
def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
|
||||
async def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
|
||||
if not room_url:
|
||||
params = DailyRoomParams(
|
||||
properties=DailyRoomProperties(
|
||||
# Note: these are the default values, except for the display name
|
||||
sip=DailyRoomSipParams(
|
||||
display_name="dialin-user",
|
||||
video=False,
|
||||
sip_mode="dial-in",
|
||||
num_endpoints=1
|
||||
display_name="dialin-user", video=False, sip_mode="dial-in", num_endpoints=1
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
print(f"Creating new room...")
|
||||
room: DailyRoomObject = daily_rest_helper.create_room(params=params)
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
|
||||
else:
|
||||
# Check passed room URL exist (we assume that it already has a sip set up!)
|
||||
try:
|
||||
print(f"Joining existing room: {room_url}")
|
||||
room: DailyRoomObject = daily_rest_helper.get_room_from_url(
|
||||
room_url)
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Room not found: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
print(f"Daily room: {room.url} {room.config.sip_endpoint}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get room or token token")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get room or token token")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in docs)
|
||||
if vendor == "daily":
|
||||
bot_proc = f"python3 -m bot_daily -u {room.url} -t {token} -i {
|
||||
callId} -d {callDomain}"
|
||||
bot_proc = f"python3 - m bot_daily - u {room.url} - t {token} - i {
|
||||
callId} - d {callDomain}"
|
||||
else:
|
||||
bot_proc = f"python3 -m bot_twilio -u {room.url} -t {
|
||||
token} -i {callId} -s {room.config.sip_endpoint}"
|
||||
bot_proc = f"python3 - m bot_twilio - u {room.url} - t {
|
||||
token} - i {callId} - s {room.config.sip_endpoint}"
|
||||
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[bot_proc],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
[bot_proc], shell=True, bufsize=1, cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return room
|
||||
|
||||
@@ -130,18 +142,16 @@ async def twilio_start_bot(request: Request):
|
||||
pass
|
||||
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
|
||||
callId = data.get('CallSid')
|
||||
callId = data.get("CallSid")
|
||||
|
||||
if not callId:
|
||||
raise HTTPException(
|
||||
status_code=500, detail="Missing 'CallSid' in request")
|
||||
raise HTTPException(status_code=500, detail="Missing 'CallSid' in request")
|
||||
|
||||
print("CallId: %s" % callId)
|
||||
|
||||
# create room and tell the bot to join the created room
|
||||
# note: Twilio does not require a callDomain
|
||||
room: DailyRoomObject = _create_daily_room(
|
||||
room_url, callId, None, "twilio")
|
||||
room: DailyRoomObject = await _create_daily_room(room_url, callId, None, "twilio")
|
||||
|
||||
print(f"Put Twilio on hold...")
|
||||
# We have the room and the SIP URI,
|
||||
@@ -151,7 +161,8 @@ async def twilio_start_bot(request: Request):
|
||||
# http://com.twilio.music.classical.s3.amazonaws.com/BusyStrings.mp3
|
||||
resp = VoiceResponse()
|
||||
resp.play(
|
||||
url="http://com.twilio.sounds.music.s3.amazonaws.com/MARKOVICHAMP-Borghestral.mp3", loop=10)
|
||||
url="http://com.twilio.sounds.music.s3.amazonaws.com/MARKOVICHAMP-Borghestral.mp3", loop=10
|
||||
)
|
||||
return str(resp)
|
||||
|
||||
|
||||
@@ -173,19 +184,14 @@ async def daily_start_bot(request: Request) -> JSONResponse:
|
||||
callId = data.get("callId", None)
|
||||
callDomain = data.get("callDomain", None)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing properties 'callId' or 'callDomain'")
|
||||
raise HTTPException(status_code=500, detail="Missing properties 'callId' or 'callDomain'")
|
||||
|
||||
print(f"CallId: {callId}, CallDomain: {callDomain}")
|
||||
room: DailyRoomObject = _create_daily_room(
|
||||
room_url, callId, callDomain, "daily")
|
||||
room: DailyRoomObject = await _create_daily_room(room_url, callId, callDomain, "daily")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
return JSONResponse({
|
||||
"room_url": room.url,
|
||||
"sipUri": room.config.sip_endpoint
|
||||
})
|
||||
return JSONResponse({"room_url": room.url, "sipUri": room.config.sip_endpoint})
|
||||
|
||||
|
||||
# ----------------- Main ----------------- #
|
||||
|
||||
@@ -197,24 +203,18 @@ if __name__ == "__main__":
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=os.getenv("HOST", "0.0.0.0"), help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
default=True, help="Reload code on change")
|
||||
parser.add_argument(
|
||||
"--host", type=str, default=os.getenv("HOST", "0.0.0.0"), help="Host address"
|
||||
)
|
||||
parser.add_argument("--port", type=int, default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", default=True, help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(
|
||||
"bot_runner:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
reload=config.reload
|
||||
)
|
||||
uvicorn.run("bot_runner:app", host=config.host, port=config.port, reload=config.reload)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Pipecat runner shutting down...")
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
@@ -7,11 +6,11 @@ import argparse
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.frames.frames import (
|
||||
LLMMessagesFrame,
|
||||
EndFrame
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -22,96 +21,95 @@ from twilio.rest import Client
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
twilio_account_sid = os.getenv('TWILIO_ACCOUNT_SID')
|
||||
twilio_auth_token = os.getenv('TWILIO_AUTH_TOKEN')
|
||||
twilio_account_sid = os.getenv("TWILIO_ACCOUNT_SID")
|
||||
twilio_auth_token = os.getenv("TWILIO_AUTH_TOKEN")
|
||||
twilioclient = Client(twilio_account_sid, twilio_auth_token)
|
||||
|
||||
daily_api_key = os.getenv("DAILY_API_KEY", "")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str, callId: str, sipUri: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# diallin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=None, # Not required for Twilio
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
)
|
||||
)
|
||||
# dialin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=None, # Not required for Twilio
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
])
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_dialin_ready")
|
||||
async def on_dialin_ready(transport, cdata):
|
||||
# For Twilio, Telnyx, etc. You need to update the state of the call
|
||||
# and forward it to the sip_uri..
|
||||
print(f"Forwarding call: {callId} {sipUri}")
|
||||
@transport.event_handler("on_dialin_ready")
|
||||
async def on_dialin_ready(transport, cdata):
|
||||
# For Twilio, Telnyx, etc. You need to update the state of the call
|
||||
# and forward it to the sip_uri..
|
||||
print(f"Forwarding call: {callId} {sipUri}")
|
||||
|
||||
try:
|
||||
# The TwiML is updated using Twilio's client library
|
||||
call = twilioclient.calls(callId).update(
|
||||
twiml=f'<Response><Dial><Sip>{sipUri}</Sip></Dial></Response>'
|
||||
)
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to forward call: {str(e)}")
|
||||
try:
|
||||
# The TwiML is updated using Twilio's client library
|
||||
call = twilioclient.calls(callId).update(
|
||||
twiml=f"<Response><Dial><Sip>{sipUri}</Sip></Dial></Response>"
|
||||
)
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to forward call: {str(e)}")
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
pipecat-ai[daily,openai,silero]
|
||||
pipecat-ai[daily,elevenlabs,openai,silero]
|
||||
fastapi
|
||||
uvicorn
|
||||
requests
|
||||
python-dotenv
|
||||
loguru
|
||||
twilio
|
||||
twilio
|
||||
python-multipart
|
||||
|
||||
@@ -13,7 +13,7 @@ from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
@@ -21,21 +21,24 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True))
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -46,11 +49,11 @@ async def main(room_url):
|
||||
# participant joins.
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
participant_name = participant["info"]["userName"] or ""
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -9,17 +9,18 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -30,10 +31,9 @@ async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = LocalAudioTransport(TransportParams(audio_out_enabled=True))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
pipeline = Pipeline([tts, transport.output()])
|
||||
@@ -42,7 +42,7 @@ async def main():
|
||||
|
||||
async def say_something():
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frames([TextFrame("Hello there!"), EndFrame()])
|
||||
await task.queue_frame(TextFrame("Hello there!"))
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -22,35 +22,34 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Say One Thing From an LLM",
|
||||
DailyParams(audio_out_enabled=True))
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing From an LLM", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an LLM in a WebRTC session, and this is a 'hello world' demo. Say hello to the world.",
|
||||
}]
|
||||
}
|
||||
]
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
@@ -64,5 +63,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -21,29 +21,26 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Show a still frame image",
|
||||
DailyParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024
|
||||
)
|
||||
DailyParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
@@ -64,5 +61,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -22,6 +22,7 @@ from pipecat.transports.local.tk import TkLocalTransport
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -35,15 +36,11 @@ async def main():
|
||||
|
||||
transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024))
|
||||
TransportParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
@@ -56,7 +53,7 @@ async def main():
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def run_tk():
|
||||
while runner.is_active():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
@@ -4,6 +4,10 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
#
|
||||
# This example broken on latest pipecat and needs updating.
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
@@ -24,14 +28,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(room_url, None, "Static And Dynamic Speech")
|
||||
|
||||
meeting = TransportServiceOutput(transport, mic_enabled=True)
|
||||
@@ -52,8 +59,7 @@ async def main(room_url: str):
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
messages = [{"role": "system",
|
||||
"content": "tell the user a joke about llamas"}]
|
||||
messages = [{"role": "system", "content": "tell the user a joke about llamas"}]
|
||||
|
||||
# Start a task to run the LLM to create a joke, and convert the LLM
|
||||
# output to audio frames. This task will run in parallel with generating
|
||||
@@ -71,8 +77,7 @@ async def main(room_url: str):
|
||||
]
|
||||
)
|
||||
|
||||
merge_pipeline = SequentialMergePipeline(
|
||||
[simple_tts_pipeline, llm_pipeline])
|
||||
merge_pipeline = SequentialMergePipeline([simple_tts_pipeline, llm_pipeline])
|
||||
|
||||
await asyncio.gather(
|
||||
transport.run(merge_pipeline),
|
||||
@@ -82,5 +87,4 @@ async def main(room_url: str):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -13,23 +13,19 @@ from dataclasses import dataclass
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
AppFrame,
|
||||
EndFrame,
|
||||
Frame,
|
||||
ImageRawFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMMessagesFrame,
|
||||
TextFrame
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.parallel_task import ParallelTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.aggregators.gated import GatedAggregator
|
||||
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -38,6 +34,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -73,8 +70,10 @@ class MonthPrepender(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
@@ -83,48 +82,44 @@ async def main(room_url):
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024
|
||||
)
|
||||
camera_out_height=1024,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
gated_aggregator = GatedAggregator(
|
||||
gate_open_fn=lambda frame: isinstance(frame, ImageRawFrame),
|
||||
gate_close_fn=lambda frame: isinstance(frame, LLMFullResponseStartFrame),
|
||||
start_open=False
|
||||
)
|
||||
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
month_prepender = MonthPrepender()
|
||||
llm_full_response_aggregator = LLMFullResponseAggregator()
|
||||
|
||||
pipeline = Pipeline([
|
||||
llm, # LLM
|
||||
sentence_aggregator, # Aggregates LLM output into full sentences
|
||||
ParallelTask( # Run pipelines in parallel aggregating the result
|
||||
[month_prepender, tts], # Create "Month: sentence" and output audio
|
||||
[llm_full_response_aggregator, imagegen] # Aggregate full LLM response
|
||||
),
|
||||
gated_aggregator, # Queues everything until an image is available
|
||||
transport.output() # Transport output
|
||||
])
|
||||
# With `SyncParallelPipeline` we synchronize audio and images by pushing
|
||||
# them basically in order (e.g. I1 A1 A1 A1 I2 A2 A2 A2 A2 I3 A3). To do
|
||||
# that, each pipeline runs concurrently and `SyncParallelPipeline` will
|
||||
# wait for the input frame to be processed.
|
||||
#
|
||||
# Note that `SyncParallelPipeline` requires all processors in it to be
|
||||
# synchronous (which is the default for most processors).
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
llm, # LLM
|
||||
sentence_aggregator, # Aggregates LLM output into full sentences
|
||||
SyncParallelPipeline( # Run pipelines in parallel aggregating the result
|
||||
[month_prepender, tts], # Create "Month: sentence" and output audio
|
||||
[imagegen], # Generate image
|
||||
),
|
||||
transport.output(), # Transport output
|
||||
]
|
||||
)
|
||||
|
||||
frames = []
|
||||
for month in [
|
||||
@@ -150,8 +145,6 @@ async def main(room_url):
|
||||
frames.append(MonthFrame(month=month))
|
||||
frames.append(LLMMessagesFrame(messages))
|
||||
|
||||
frames.append(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
@@ -162,5 +155,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -11,18 +11,25 @@ import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame, Frame, URLImageRawFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
OutputAudioRawFrame,
|
||||
TTSAudioRawFrame,
|
||||
URLImageRawFrame,
|
||||
LLMMessagesFrame,
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkOutputTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
@@ -42,7 +49,12 @@ async def main():
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def get_month_data(month):
|
||||
messages = [{"role": "system", "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.", }]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.",
|
||||
}
|
||||
]
|
||||
|
||||
class ImageDescription(FrameProcessor):
|
||||
def __init__(self):
|
||||
@@ -60,14 +72,16 @@ async def main():
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.audio = bytearray()
|
||||
self.frame = None
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
if isinstance(frame, TTSAudioRawFrame):
|
||||
self.audio.extend(frame.audio)
|
||||
self.frame = AudioRawFrame(
|
||||
bytes(self.audio), frame.sample_rate, frame.num_channels)
|
||||
self.frame = OutputAudioRawFrame(
|
||||
bytes(self.audio), frame.sample_rate, frame.num_channels
|
||||
)
|
||||
|
||||
class ImageGrabber(FrameProcessor):
|
||||
def __init__(self):
|
||||
@@ -80,23 +94,20 @@ async def main():
|
||||
if isinstance(frame, URLImageRawFrame):
|
||||
self.frame = frame
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"))
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
aggregator = LLMFullResponseAggregator()
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
|
||||
description = ImageDescription()
|
||||
|
||||
@@ -104,13 +115,25 @@ async def main():
|
||||
|
||||
image_grabber = ImageGrabber()
|
||||
|
||||
pipeline = Pipeline([
|
||||
llm,
|
||||
aggregator,
|
||||
description,
|
||||
ParallelPipeline([tts, audio_grabber],
|
||||
[imagegen, image_grabber])
|
||||
])
|
||||
# With `SyncParallelPipeline` we synchronize audio and images by
|
||||
# pushing them basically in order (e.g. I1 A1 A1 A1 I2 A2 A2 A2 A2
|
||||
# I3 A3). To do that, each pipeline runs concurrently and
|
||||
# `SyncParallelPipeline` will wait for the input frame to be
|
||||
# processed.
|
||||
#
|
||||
# Note that `SyncParallelPipeline` requires all processors in it to
|
||||
# be synchronous (which is the default for most processors).
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
llm, # LLM
|
||||
sentence_aggregator, # Aggregates LLM output into full sentences
|
||||
description, # Store sentence
|
||||
SyncParallelPipeline(
|
||||
[tts, audio_grabber], # Generate and store audio for the given sentence
|
||||
[imagegen, image_grabber], # Generate and storeimage for the given sentence
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frame(LLMMessagesFrame(messages))
|
||||
@@ -131,20 +154,19 @@ async def main():
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024))
|
||||
camera_out_height=1024,
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
# We only specify 5 months as we create tasks all at once and we might
|
||||
# get rate limited otherwise.
|
||||
# We only specify a few months as we create tasks all at once and we
|
||||
# might get rate limited otherwise.
|
||||
months: list[str] = [
|
||||
"January",
|
||||
"February",
|
||||
# "March",
|
||||
# "April",
|
||||
# "May",
|
||||
]
|
||||
|
||||
# We create one task per month. This will be executed concurrently.
|
||||
|
||||
@@ -9,16 +9,22 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.frames.frames import Frame, LLMMessagesFrame, MetricsFrame
|
||||
from pipecat.metrics.metrics import (
|
||||
TTFBMetricsData,
|
||||
ProcessingMetricsData,
|
||||
LLMUsageMetricsData,
|
||||
TTSUsageMetricsData,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -28,14 +34,37 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
class MetricsLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, MetricsFrame):
|
||||
for d in frame.data:
|
||||
if isinstance(d, TTFBMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, ttfb: {d.value}")
|
||||
elif isinstance(d, ProcessingMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, processing: {d.value}")
|
||||
elif isinstance(d, LLMUsageMetricsData):
|
||||
tokens = d.value
|
||||
print(
|
||||
f"!!! MetricsFrame: {frame}, tokens: {
|
||||
tokens.prompt_tokens}, characters: {
|
||||
tokens.completion_tokens}"
|
||||
)
|
||||
elif isinstance(d, TTSUsageMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, characters: {d.value}")
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -44,23 +73,18 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
fl = FrameLogger("!!! after LLM", "red")
|
||||
fltts = FrameLogger("@@@ out of tts", "green")
|
||||
flend = FrameLogger("### out of the end", "magenta")
|
||||
ml = MetricsLogger()
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -71,17 +95,17 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
fl,
|
||||
tts,
|
||||
fltts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
flend
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
ml,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -89,8 +113,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -99,5 +122,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -11,7 +11,7 @@ import sys
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import ImageRawFrame, Frame, SystemFrame, TextFrame
|
||||
from pipecat.frames.frames import Frame, OutputImageRawFrame, SystemFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
@@ -20,8 +20,8 @@ from pipecat.processors.aggregators.llm_response import (
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
@@ -31,6 +31,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -51,39 +52,51 @@ class ImageSyncAggregator(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if not isinstance(frame, SystemFrame):
|
||||
await self.push_frame(ImageRawFrame(image=self._speaking_image_bytes, size=(1024, 1024), format=self._speaking_image_format))
|
||||
if not isinstance(frame, SystemFrame) and direction == FrameDirection.DOWNSTREAM:
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(
|
||||
image=self._speaking_image_bytes,
|
||||
size=(1024, 1024),
|
||||
format=self._speaking_image_format,
|
||||
)
|
||||
)
|
||||
await self.push_frame(frame)
|
||||
await self.push_frame(ImageRawFrame(image=self._waiting_image_bytes, size=(1024, 1024), format=self._waiting_image_format))
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(
|
||||
image=self._waiting_image_bytes,
|
||||
size=(1024, 1024),
|
||||
format=self._waiting_image_format,
|
||||
)
|
||||
)
|
||||
else:
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -100,23 +113,25 @@ async def main(room_url: str, token):
|
||||
os.path.join(os.path.dirname(__file__), "assets", "waiting.png"),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
image_sync_aggregator,
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
image_sync_aggregator,
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
participant_name = participant["info"]["userName"] or ""
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([TextFrame(f"Hi, this is {participant_name}.")])
|
||||
await task.queue_frames([TextFrame(f"Hi there {participant_name}!")])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
@@ -124,5 +139,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,8 +14,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -25,14 +27,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -41,19 +46,16 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -65,27 +67,32 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -94,5 +101,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,8 +14,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -25,14 +27,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -41,19 +46,18 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-opus-20240229")
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-opus-20240229"
|
||||
)
|
||||
|
||||
# todo: think more about how to handle system prompts in a more general way. OpenAI,
|
||||
# Google, and Anthropic all have slightly different approaches to providing a system
|
||||
@@ -68,14 +72,16 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -91,5 +97,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -15,9 +15,11 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
@@ -32,6 +34,7 @@ from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -47,8 +50,10 @@ def get_session_history(session_id: str) -> BaseChatMessageHistory:
|
||||
return message_store[session_id]
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -61,27 +66,29 @@ async def main(room_url: str, token):
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system",
|
||||
"Be nice and helpful. Answer very briefly and without special characters like `#` or `*`. "
|
||||
"Your response will be synthesized to voice and those characters will create unnatural sounds.",
|
||||
),
|
||||
(
|
||||
"system",
|
||||
"Be nice and helpful. Answer very briefly and without special characters like `#` or `*`. "
|
||||
"Your response will be synthesized to voice and those characters will create unnatural sounds.",
|
||||
),
|
||||
MessagesPlaceholder("chat_history"),
|
||||
("human", "{input}"),
|
||||
])
|
||||
]
|
||||
)
|
||||
chain = prompt | ChatOpenAI(model="gpt-4o", temperature=0.7)
|
||||
history_chain = RunnableWithMessageHistory(
|
||||
chain,
|
||||
get_session_history,
|
||||
history_messages_key="chat_history",
|
||||
input_messages_key="input")
|
||||
input_messages_key="input",
|
||||
)
|
||||
lc = LangchainProcessor(history_chain)
|
||||
|
||||
tma_in = LLMUserResponseAggregator()
|
||||
@@ -89,12 +96,12 @@ async def main(room_url: str, token):
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
lc, # Langchain
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
lc, # Langchain
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
@@ -108,11 +115,7 @@ async def main(room_url: str, token):
|
||||
# the `LLMMessagesFrame` will be picked up by the LangchainProcessor using
|
||||
# only the content of the last message to inject it in the prompt defined
|
||||
# above. So no role is required here.
|
||||
messages = [(
|
||||
{
|
||||
"content": "Please briefly introduce yourself to the user."
|
||||
}
|
||||
)]
|
||||
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -121,5 +124,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,7 +14,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -25,14 +27,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -41,21 +46,17 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True
|
||||
)
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
voice="aura-helios-en"
|
||||
aiohttp_session=session, api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -67,15 +68,17 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -83,8 +86,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -93,5 +95,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
102
examples/foundational/07d-interruptible-elevenlabs.py
Normal file
102
examples/foundational/07d-interruptible-elevenlabs.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -4,8 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -14,26 +14,30 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.playht import PlayHTTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -43,8 +47,8 @@ async def main(room_url: str, token):
|
||||
audio_out_sample_rate=16000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = PlayHTTTSService(
|
||||
@@ -53,9 +57,7 @@ async def main(room_url: str, token):
|
||||
voice_url="s3://voice-cloning-zero-shot/801a663f-efd0-4254-98d0-5c175514c3e8/jennifer/manifest.json",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -67,14 +69,16 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -82,8 +86,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -92,5 +95,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
107
examples/foundational/07f-interruptible-azure.py
Normal file
107
examples/foundational/07f-interruptible-azure.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.azure import AzureLLMService, AzureSTTService, AzureTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = AzureSTTService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
llm = AzureLLMService(
|
||||
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
|
||||
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
|
||||
model=os.getenv("AZURE_CHATGPT_MODEL"),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -4,8 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -14,7 +14,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.openai import OpenAITTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -25,14 +27,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -42,18 +47,13 @@ async def main(room_url: str, token):
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = OpenAITTSService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
voice="alloy"
|
||||
)
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="alloy")
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -65,14 +65,16 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -80,8 +82,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -90,5 +91,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -17,7 +17,7 @@ from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openpipe import OpenPipeLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -28,14 +28,17 @@ from loguru import logger
|
||||
import time
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -44,14 +47,13 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
timestamp = int(time.time())
|
||||
@@ -59,9 +61,7 @@ async def main(room_url: str, token):
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
openpipe_api_key=os.getenv("OPENPIPE_API_KEY"),
|
||||
model="gpt-4o",
|
||||
tags={
|
||||
"conversation_id": f"pipecat-{timestamp}"
|
||||
}
|
||||
tags={"conversation_id": f"pipecat-{timestamp}"},
|
||||
)
|
||||
|
||||
messages = [
|
||||
@@ -73,14 +73,16 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -88,8 +90,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -98,5 +99,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
99
examples/foundational/07i-interruptible-xtts.py
Normal file
99
examples/foundational/07i-interruptible-xtts.py
Normal file
@@ -0,0 +1,99 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.xtts import XTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = XTTSService(
|
||||
aiohttp_session=session,
|
||||
voice_id="Claribel Dervla",
|
||||
language="en",
|
||||
base_url="http://localhost:8000",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -14,48 +14,53 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.gladia import GladiaSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=44100,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = GladiaSTTService(
|
||||
api_key=os.getenv("GLADIA_API_KEY"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_name="British Lady",
|
||||
output_format="pcm_44100"
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -67,14 +72,17 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -82,8 +90,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -92,5 +99,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
@@ -4,8 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -14,47 +14,46 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.azure import AzureTTSService
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.lmnt import LmntTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
tts = LmntTTSService(api_key=os.getenv("LMNT_API_KEY"), voice_id="morgan")
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -66,14 +65,16 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -81,8 +82,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -91,5 +91,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
107
examples/foundational/07l-interruptible-together.py
Normal file
107
examples/foundational/07l-interruptible-together.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.together import TogetherLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = TogetherLLMService(
|
||||
api_key=os.getenv("TOGETHER_API_KEY"),
|
||||
model=os.getenv("TOGETHER_MODEL"),
|
||||
params=TogetherLLMService.InputParams(
|
||||
temperature=1.0,
|
||||
top_p=0.9,
|
||||
top_k=40,
|
||||
extra={
|
||||
"frequency_penalty": 2.0,
|
||||
"presence_penalty": 0.0,
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -3,18 +3,19 @@ import aiohttp
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from pipecat.pipeline.aggregators import SentenceAggregator
|
||||
from pipecat.processors.aggregators import SentenceAggregator
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
|
||||
from pipecat.transports.daily_transport import DailyTransport
|
||||
from pipecat.services.azure_ai_services import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService
|
||||
from pipecat.services.fal_ai_services import FalImageGenService
|
||||
from pipecat.pipeline.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
|
||||
@@ -22,8 +23,10 @@ logger = logging.getLogger("pipecat")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
@@ -51,9 +54,7 @@ async def main(room_url: str):
|
||||
voice_id="jBpfuIE2acCO8z3wKNLl",
|
||||
)
|
||||
dalle = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="1024x1024"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="1024x1024"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
@@ -73,13 +74,11 @@ async def main(room_url: str):
|
||||
|
||||
async def get_text_and_audio(messages) -> Tuple[str, bytearray]:
|
||||
"""This function streams text from the LLM and uses the TTS service to convert
|
||||
that text to speech as it's received. """
|
||||
that text to speech as it's received."""
|
||||
source_queue = asyncio.Queue()
|
||||
sink_queue = asyncio.Queue()
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
pipeline = Pipeline(
|
||||
[llm, sentence_aggregator, tts1], source_queue, sink_queue
|
||||
)
|
||||
pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)
|
||||
|
||||
await source_queue.put(LLMMessagesFrame(messages))
|
||||
await source_queue.put(EndFrame())
|
||||
@@ -144,5 +143,4 @@ async def main(room_url: str):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,12 +4,21 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
InputImageRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputImageRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.services.daily import DailyTransport, DailyParams
|
||||
|
||||
from runner import configure
|
||||
@@ -17,38 +26,63 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url, token):
|
||||
transport = DailyTransport(
|
||||
room_url, token, "Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720
|
||||
class MirrorProcessor(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, InputAudioRawFrame):
|
||||
await self.push_frame(
|
||||
OutputAudioRawFrame(
|
||||
audio=frame.audio,
|
||||
sample_rate=frame.sample_rate,
|
||||
num_channels=frame.num_channels,
|
||||
)
|
||||
)
|
||||
elif isinstance(frame, InputImageRawFrame):
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(image=frame.image, size=frame.size, format=frame.format)
|
||||
)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
|
||||
pipeline = Pipeline([transport.input(), transport.output()])
|
||||
pipeline = Pipeline([transport.input(), MirrorProcessor(), transport.output()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,14 +4,23 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
InputImageRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputImageRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -21,46 +30,73 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url, token):
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Local Mirror")
|
||||
class MirrorProcessor(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
daily_transport = DailyTransport(room_url, token, "Test", DailyParams(audio_in_enabled=True))
|
||||
if isinstance(frame, InputAudioRawFrame):
|
||||
await self.push_frame(
|
||||
OutputAudioRawFrame(
|
||||
audio=frame.audio,
|
||||
sample_rate=frame.sample_rate,
|
||||
num_channels=frame.num_channels,
|
||||
)
|
||||
)
|
||||
elif isinstance(frame, InputImageRawFrame):
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(image=frame.image, size=frame.size, format=frame.format)
|
||||
)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720))
|
||||
|
||||
@daily_transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
pipeline = Pipeline([daily_transport.input(), tk_transport.output()])
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Local Mirror")
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
daily_transport = DailyTransport(
|
||||
room_url, token, "Test", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
async def run_tk():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
),
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
@daily_transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
|
||||
await asyncio.gather(runner.run(task), run_tk())
|
||||
pipeline = Pipeline([daily_transport.input(), MirrorProcessor(), tk_transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
async def run_tk():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await asyncio.gather(runner.run(task), run_tk())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,8 +14,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -25,15 +27,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -42,19 +46,16 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -67,15 +68,17 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
hey_robot_filter, # Filter out speech not directed at the robot
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
hey_robot_filter, # Filter out speech not directed at the robot
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -90,5 +93,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -12,9 +12,9 @@ import wave
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
AudioRawFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMMessagesFrame,
|
||||
OutputAudioRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -25,7 +25,7 @@ from pipecat.processors.aggregators.llm_response import (
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -35,6 +35,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -53,12 +54,12 @@ for file in sound_files:
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the image and convert it to bytes
|
||||
with wave.open(full_path) as audio_file:
|
||||
sounds[file] = AudioRawFrame(audio_file.readframes(-1),
|
||||
audio_file.getframerate(), audio_file.getnchannels())
|
||||
sounds[file] = OutputAudioRawFrame(
|
||||
audio_file.readframes(-1), audio_file.getframerate(), audio_file.getnchannels()
|
||||
)
|
||||
|
||||
|
||||
class OutboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
@@ -71,7 +72,6 @@ class OutboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
|
||||
class InboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
@@ -83,8 +83,10 @@ class InboundSoundEffectWrapper(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -93,18 +95,15 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="ErXwobaYiN019PkySvjV",
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
messages = [
|
||||
@@ -121,18 +120,20 @@ async def main(room_url: str, token):
|
||||
fl = FrameLogger("LLM Out")
|
||||
fl2 = FrameLogger("Transcription In")
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
tma_in,
|
||||
in_sound,
|
||||
fl2,
|
||||
llm,
|
||||
fl,
|
||||
tts,
|
||||
out_sound,
|
||||
transport.output(),
|
||||
tma_out
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
in_sound,
|
||||
fl2,
|
||||
llm,
|
||||
fl,
|
||||
tts,
|
||||
out_sound,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
@@ -148,5 +149,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -26,6 +26,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +34,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -45,12 +45,16 @@ class UserImageRequester(FrameProcessor):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -59,14 +63,8 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
@@ -78,10 +76,9 @@ async def main(room_url: str, token):
|
||||
# If you run into weird description, try with use_cpu=True
|
||||
moondream = MoondreamService()
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -91,15 +88,17 @@ async def main(room_url: str, token):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
moondream,
|
||||
tts,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
moondream,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -107,6 +106,6 @@ async def main(room_url: str, token):
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -26,6 +26,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +34,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -45,12 +45,16 @@ class UserImageRequester(FrameProcessor):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -60,8 +64,8 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
@@ -71,13 +75,12 @@ async def main(room_url: str, token):
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
google = GoogleLLMService(
|
||||
model="gemini-1.5-flash-latest",
|
||||
api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
model="gemini-1.5-flash-latest", api_key=os.getenv("GOOGLE_API_KEY")
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -87,15 +90,17 @@ async def main(room_url: str, token):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
google,
|
||||
tts,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
google,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -103,6 +108,6 @@ async def main(room_url: str, token):
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -26,6 +26,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +34,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -45,12 +45,16 @@ class UserImageRequester(FrameProcessor):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -59,8 +63,8 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
@@ -69,15 +73,11 @@ async def main(room_url: str, token):
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
openai = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o"
|
||||
)
|
||||
openai = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -87,15 +87,17 @@ async def main(room_url: str, token):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
openai,
|
||||
tts,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
openai,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -103,6 +105,6 @@ async def main(room_url: str, token):
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -26,6 +26,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +34,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -45,12 +45,16 @@ class UserImageRequester(FrameProcessor):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -59,8 +63,8 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
@@ -69,15 +73,12 @@ async def main(room_url: str, token):
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
anthropic = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-sonnet-20240229"
|
||||
)
|
||||
anthropic = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY"))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
sample_rate=16000,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -87,15 +88,17 @@ async def main(room_url: str, token):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
anthropic,
|
||||
tts,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
anthropic,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -103,6 +106,6 @@ async def main(room_url: str, token):
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
@@ -20,6 +21,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -27,7 +29,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
@@ -35,23 +36,26 @@ class TranscriptionLogger(FrameProcessor):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
transport = DailyTransport(room_url, None, "Transcription bot",
|
||||
DailyParams(audio_in_enabled=True))
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
stt = WhisperSTTService()
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Transcription bot", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
stt = WhisperSTTService()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
runner = PipelineRunner()
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -19,6 +19,7 @@ from pipecat.transports.local.audio import LocalAudioTransport
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -26,7 +27,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
@@ -21,6 +22,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -28,7 +30,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
@@ -36,23 +37,26 @@ class TranscriptionLogger(FrameProcessor):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
transport = DailyTransport(room_url, None, "Transcription bot",
|
||||
DailyParams(audio_in_enabled=True))
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
stt = DeepgramSTTService(os.getenv("DEEPGRAM_API_KEY"))
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Transcription bot", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
runner = PipelineRunner()
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -13,12 +13,8 @@ from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator,
|
||||
)
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -30,22 +26,25 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def start_fetch_weather(llm):
|
||||
await llm.push_frame(TextFrame("Let me think."))
|
||||
async def start_fetch_weather(function_name, llm, context):
|
||||
await llm.push_frame(TextFrame("Let me check on that."))
|
||||
|
||||
|
||||
async def fetch_weather_from_api(llm, args):
|
||||
return {"conditions": "nice", "temperature": "75"}
|
||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -54,23 +53,19 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm.register_function(
|
||||
"get_current_weather",
|
||||
fetch_weather_from_api,
|
||||
start_callback=start_fetch_weather)
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
# Register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
fl_in = FrameLogger("Inner")
|
||||
fl_out = FrameLogger("Outer")
|
||||
@@ -90,17 +85,15 @@ async def main(room_url: str, token):
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"celsius",
|
||||
"fahrenheit"],
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": [
|
||||
"location",
|
||||
"format"],
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
})]
|
||||
},
|
||||
)
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -109,22 +102,24 @@ async def main(room_url: str, token):
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
tma_in = LLMUserContextAggregator(context)
|
||||
tma_out = LLMAssistantContextAggregator(context)
|
||||
pipeline = Pipeline([
|
||||
fl_in,
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
fl_out,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out
|
||||
])
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
fl_in,
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
fl_out,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@ transport.event_handler("on_first_participant_joined")
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
@@ -136,5 +131,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -14,10 +14,6 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.filters.function_filter import FunctionFilter
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
@@ -32,6 +28,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -40,10 +37,14 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
current_voice = "News Lady"
|
||||
|
||||
|
||||
async def switch_voice(llm, args):
|
||||
async def switch_voice(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
global current_voice
|
||||
current_voice = args["voice"]
|
||||
return {"voice": f"You are now using your {current_voice} voice. Your responses should now be as if you were a {current_voice}."}
|
||||
await result_callback(
|
||||
{
|
||||
"voice": f"You are now using your {current_voice} voice. Your responses should now be as if you were a {current_voice}."
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def news_lady_filter(frame) -> bool:
|
||||
@@ -58,42 +59,38 @@ async def barbershop_man_filter(frame) -> bool:
|
||||
return current_voice == "Barbershop Man"
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Pipecat",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=44100,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
news_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_name="Newslady",
|
||||
output_format="pcm_44100"
|
||||
voice_id="bf991597-6c13-47e4-8411-91ec2de5c466", # Newslady
|
||||
)
|
||||
|
||||
british_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_name="British Lady",
|
||||
output_format="pcm_44100"
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
barbershop_man = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_name="Barbershop Man",
|
||||
output_format="pcm_44100"
|
||||
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm.register_function("switch_voice", switch_voice)
|
||||
|
||||
tools = [
|
||||
@@ -112,7 +109,9 @@ async def main(room_url: str, token):
|
||||
},
|
||||
"required": ["voice"],
|
||||
},
|
||||
})]
|
||||
},
|
||||
)
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -121,21 +120,22 @@ async def main(room_url: str, token):
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
tma_in = LLMUserContextAggregator(context)
|
||||
tma_out = LLMAssistantContextAggregator(context)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (one of the following vocies)
|
||||
[FunctionFilter(news_lady_filter), news_lady], # News Lady voice
|
||||
[FunctionFilter(british_lady_filter), british_lady], # British Lady voice
|
||||
[FunctionFilter(barbershop_man_filter), barbershop_man], # Barbershop Man voice
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (one of the following vocies)
|
||||
[FunctionFilter(news_lady_filter), news_lady], # News Lady voice
|
||||
[FunctionFilter(british_lady_filter), british_lady], # British Lady voice
|
||||
[FunctionFilter(barbershop_man_filter), barbershop_man], # Barbershop Man voice
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -146,7 +146,9 @@ async def main(room_url: str, token):
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Please introduce yourself to the user and let them know the voices you can do. Your initial responses should be as if you were a {current_voice}."})
|
||||
"content": f"Please introduce yourself to the user and let them know the voices you can do. Your initial responses should be as if you were a {current_voice}.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -155,5 +157,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,13 +14,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.filters.function_filter import FunctionFilter
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.whisper import Model, WhisperSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -33,6 +29,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -41,10 +38,10 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
current_language = "English"
|
||||
|
||||
|
||||
async def switch_language(llm, args):
|
||||
async def switch_language(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
global current_language
|
||||
current_language = args["language"]
|
||||
return {"voice": f"Your answers from now on should be in {current_language}."}
|
||||
await result_callback({"voice": f"Your answers from now on should be in {current_language}."})
|
||||
|
||||
|
||||
async def english_filter(frame) -> bool:
|
||||
@@ -55,8 +52,10 @@ async def spanish_filter(frame) -> bool:
|
||||
return current_language == "Spanish"
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -66,28 +65,23 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True
|
||||
)
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = WhisperSTTService(model=Model.LARGE)
|
||||
|
||||
english_tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
english_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
spanish_tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
model="eleven_multilingual_v2",
|
||||
voice_id="9F4C8ztpNUmXkdDDbz3J",
|
||||
spanish_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="846d6cb0-2301-48b6-9683-48f5618ea2f6", # Spanish-speaking Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm.register_function("switch_language", switch_language)
|
||||
|
||||
tools = [
|
||||
@@ -106,7 +100,9 @@ async def main(room_url: str, token):
|
||||
},
|
||||
"required": ["language"],
|
||||
},
|
||||
})]
|
||||
},
|
||||
)
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -115,21 +111,22 @@ async def main(room_url: str, token):
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
tma_in = LLMUserContextAggregator(context)
|
||||
tma_out = LLMAssistantContextAggregator(context)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (bot will speak the chosen language)
|
||||
[FunctionFilter(english_filter), english_tts], # English
|
||||
[FunctionFilter(spanish_filter), spanish_tts], # Spanish
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (bot will speak the chosen language)
|
||||
[FunctionFilter(english_filter), english_tts], # English
|
||||
[FunctionFilter(spanish_filter), spanish_tts], # Spanish
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -140,7 +137,9 @@ async def main(room_url: str, token):
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Please introduce yourself to the user and let them know the languages you speak. Your initial responses should be in {current_language}."})
|
||||
"content": f"Please introduce yourself to the user and let them know the languages you speak. Your initial responses should be in {current_language}.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -149,5 +148,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -8,17 +8,22 @@ import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyTransportMessageFrame
|
||||
from pipecat.transports.services.daily import (
|
||||
DailyParams,
|
||||
DailyTransport,
|
||||
DailyTransportMessageFrame,
|
||||
)
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
@@ -26,14 +31,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -42,15 +50,15 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = DeepgramTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
voice="aura-asteria-en",
|
||||
base_url="http://0.0.0.0:8080/v1/speak"
|
||||
base_url="http://0.0.0.0:8080/v1/speak",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
@@ -59,7 +67,7 @@ async def main(room_url: str, token):
|
||||
# model="gpt-4o"
|
||||
# Or, to use a local vLLM (or similar) api server
|
||||
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
||||
base_url="http://0.0.0.0:8000/v1"
|
||||
base_url="http://0.0.0.0:8000/v1",
|
||||
)
|
||||
|
||||
messages = [
|
||||
@@ -72,14 +80,16 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@@ -92,8 +102,7 @@ async def main(room_url: str, token):
|
||||
# When the first participant joins, the bot should introduce itself.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
# Handle "latency-ping" messages. The client will send app messages that look like
|
||||
@@ -110,14 +119,18 @@ async def main(room_url: str, token):
|
||||
logger.debug(f"Received latency ping app message: {message}")
|
||||
ts = message["latency-ping"]["ts"]
|
||||
# Send immediately
|
||||
transport.output().send_message(DailyTransportMessageFrame(
|
||||
message={"latency-pong-msg-handler": {"ts": ts}},
|
||||
participant_id=sender))
|
||||
transport.output().send_message(
|
||||
DailyTransportMessageFrame(
|
||||
message={"latency-pong-msg-handler": {"ts": ts}}, participant_id=sender
|
||||
)
|
||||
)
|
||||
# And push to the pipeline for the Daily transport.output to send
|
||||
await tma_in.push_frame(
|
||||
DailyTransportMessageFrame(
|
||||
message={"latency-pong-pipeline-delivery": {"ts": ts}},
|
||||
participant_id=sender))
|
||||
participant_id=sender,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"message handling error: {e} - {message}")
|
||||
|
||||
@@ -126,5 +139,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
116
examples/foundational/17-detect-user-idle.py
Normal file
116
examples/foundational/17-detect-user-idle.py
Normal file
@@ -0,0 +1,116 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
async def user_idle_callback(user_idle: UserIdleProcessor):
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Ask the user if they are still there and try to prompt for some input, but be short.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
|
||||
user_idle = UserIdleProcessor(callback=user_idle_callback, timeout=5.0)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
user_idle, # Idle user check-in
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
76
examples/foundational/18-gstreamer-filesrc.py
Normal file
76
examples/foundational/18-gstreamer-filesrc.py
Normal file
@@ -0,0 +1,76 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure_with_args
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument("-i", "--input", type=str, required=True, help="Input video file")
|
||||
|
||||
(room_url, _, args) = await configure_with_args(session, parser)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"GStreamer",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_is_live=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
camera_out_is_live=True,
|
||||
),
|
||||
)
|
||||
|
||||
gst = GStreamerPipelineSource(
|
||||
pipeline=f"filesrc location={args.input}",
|
||||
out_params=GStreamerPipelineSource.OutputParams(
|
||||
video_width=1280,
|
||||
video_height=720,
|
||||
audio_sample_rate=16000,
|
||||
audio_channels=1,
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
gst, # GStreamer file source
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
67
examples/foundational/18a-gstreamer-videotestsrc.py
Normal file
67
examples/foundational/18a-gstreamer-videotestsrc.py
Normal file
@@ -0,0 +1,67 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"GStreamer",
|
||||
DailyParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
camera_out_is_live=True,
|
||||
),
|
||||
)
|
||||
|
||||
gst = GStreamerPipelineSource(
|
||||
pipeline='videotestsrc ! capsfilter caps="video/x-raw,width=1280,height=720,framerate=30/1"',
|
||||
out_params=GStreamerPipelineSource.OutputParams(
|
||||
video_width=1280, video_height=720, clock_sync=False
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
gst, # GStreamer file source
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
118
examples/foundational/19a-tools-anthropic.py
Normal file
118
examples/foundational/19a-tools-anthropic.py
Normal file
@@ -0,0 +1,118 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-5-sonnet-20240620"
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
# messages = [{"role": "system",
|
||||
# "content": "You are a helpful assistant who can report the weather in any location in the universe. Respond concisely. Your response will be turned into speech so use only simple words and punctuation."},
|
||||
# {"role": "user",
|
||||
# "content": " Start the conversation by introducing yourself."}]
|
||||
|
||||
messages = [{"role": "user", "content": "Say 'hello' to start the conversation."}]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User spoken responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
173
examples/foundational/19b-tools-video-anthropic.py
Normal file
173
examples/foundational/19b-tools-video-anthropic.py
Normal file
@@ -0,0 +1,173 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
video_participant_id = None
|
||||
|
||||
|
||||
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def get_image(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
question = arguments["question"]
|
||||
await llm.request_image_frame(user_id=video_participant_id, text_content=question)
|
||||
|
||||
|
||||
async def main():
|
||||
global llm
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-5-sonnet-20240620",
|
||||
enable_prompt_caching_beta=True,
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
llm.register_function("get_image", get_image)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "get_image",
|
||||
"description": "Get an image from the video stream.",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
"required": ["question"],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
system_prompt = """\
|
||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||
|
||||
Your response will be turned into speech so use only simple words and punctuation.
|
||||
|
||||
You have access to two tools: get_weather and get_image.
|
||||
|
||||
You can respond to questions about the weather using the get_weather tool.
|
||||
|
||||
You can answer questions about the user's video stream using the get_image tool. Some examples of phrases that \
|
||||
indicate you should use the get_image tool are:
|
||||
- What do you see?
|
||||
- What's in the video?
|
||||
- Can you describe the video?
|
||||
- Tell me about what you see.
|
||||
- Tell me something interesting about what you see.
|
||||
- What's happening in the video?
|
||||
|
||||
If you need to use a tool, simply use the tool. Do not tell the user the tool you are using. Be brief and concise.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": system_prompt,
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "user", "content": "Start the conversation by introducing yourself."},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User speech to text
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
global video_participant_id
|
||||
video_participant_id = participant["id"]
|
||||
transport.capture_participant_transcription(video_participant_id)
|
||||
transport.capture_participant_video(video_participant_id, framerate=0)
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
137
examples/foundational/19c-tools-togetherai.py
Normal file
137
examples/foundational/19c-tools-togetherai.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.together import TogetherLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def get_current_weather(
|
||||
function_name, tool_call_id, arguments, llm, context, result_callback
|
||||
):
|
||||
logger.debug("IN get_current_weather")
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = TogetherLLMService(
|
||||
api_key=os.getenv("TOGETHER_API_KEY"),
|
||||
model=os.getenv("TOGETHER_MODEL"),
|
||||
)
|
||||
llm.register_function("get_current_weather", get_current_weather)
|
||||
|
||||
weatherTool = {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
|
||||
system_prompt = f"""\
|
||||
You have access to the following functions:
|
||||
|
||||
Use the function '{weatherTool["name"]}' to '{weatherTool["description"]}':
|
||||
{json.dumps(weatherTool)}
|
||||
|
||||
If you choose to call a function ONLY reply in the following format with no prefix or suffix:
|
||||
|
||||
<function=example_function_name>{{\"example_name\": \"example_value\"}}</function>
|
||||
|
||||
Reminder:
|
||||
- Function calls MUST follow the specified format, start with <function= and end with </function>
|
||||
- Required parameters MUST be specified
|
||||
- Only call one function at a time
|
||||
- Put the entire function call reply on one line
|
||||
- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls
|
||||
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": "Wait for the user to say something."},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User speech to text
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,18 +1,29 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
(url, token, _) = await configure_with_args(aiohttp_session)
|
||||
return (url, token)
|
||||
|
||||
|
||||
async def configure_with_args(
|
||||
aiohttp_session: aiohttp.ClientSession, parser: argparse.ArgumentParser | None = None
|
||||
):
|
||||
if not parser:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +39,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return (url, token)
|
||||
return (url, token, args)
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
@@ -7,10 +13,11 @@ from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
ImageRawFrame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
Frame,
|
||||
LLMMessagesFrame,
|
||||
AudioRawFrame,
|
||||
TTSAudioRawFrame,
|
||||
TTSStoppedFrame,
|
||||
TextFrame,
|
||||
UserImageRawFrame,
|
||||
@@ -25,7 +32,7 @@ from pipecat.processors.aggregators.llm_response import LLMUserResponseAggregato
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -36,6 +43,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -53,7 +61,7 @@ for i in range(1, 26):
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
@@ -76,7 +84,7 @@ class TalkingAnimation(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
if isinstance(frame, TTSAudioRawFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
@@ -99,7 +107,9 @@ class UserImageRequester(FrameProcessor):
|
||||
|
||||
if self.participant_id and isinstance(frame, TextFrame):
|
||||
if frame.text == user_request_answer:
|
||||
await self.push_frame(UserImageRequestFrame(self.participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self.participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(TextFrame("Describe the image in a short sentence."))
|
||||
elif isinstance(frame, UserImageRawFrame):
|
||||
await self.push_frame(frame)
|
||||
@@ -128,8 +138,10 @@ class ImageFilterProcessor(FrameProcessor):
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -141,19 +153,16 @@ async def main(room_url: str, token):
|
||||
camera_out_height=576,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
@@ -176,17 +185,17 @@ async def main(room_url: str, token):
|
||||
|
||||
ura = LLMUserResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
ura,
|
||||
llm,
|
||||
ParallelPipeline(
|
||||
[sa, ir, va, moondream],
|
||||
[tf, imgf]),
|
||||
tts,
|
||||
ta,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
ura,
|
||||
llm,
|
||||
ParallelPipeline([sa, ir, va, moondream], [tf, imgf]),
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frame(quiet_frame)
|
||||
@@ -204,5 +213,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,moondream,openai,silero]
|
||||
pipecat-ai[daily,cartesia,moondream,openai,silero]
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +31,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
@@ -1,31 +1,52 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -39,45 +60,42 @@ app.add_middleware(
|
||||
@app.get("/start")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room_url, room_name = _create_room()
|
||||
print(f"!!! Room URL: {room_url}")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
print(f"!!! Room URL: {room.url}")
|
||||
# Ensure the room property is present
|
||||
if not room_url:
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
|
||||
)
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
|
||||
)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
],
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
bot_procs[proc.pid] = (proc, room.url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
return RedirectResponse(room.url)
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
@@ -87,8 +105,7 @@ def get_status(pid: int):
|
||||
|
||||
# If the subprocess doesn't exist, return an error
|
||||
if not proc:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
|
||||
# Check the status of the subprocess
|
||||
if proc[0].poll() is None:
|
||||
@@ -105,14 +122,10 @@ if __name__ == "__main__":
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Moondream FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
parser = argparse.ArgumentParser(description="Daily Moondream FastAPI server")
|
||||
parser.add_argument("--host", type=str, default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int, default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
@@ -10,24 +10,14 @@ import os
|
||||
import sys
|
||||
import wave
|
||||
|
||||
from typing import List
|
||||
|
||||
from openai._types import NotGiven, NOT_GIVEN
|
||||
|
||||
from openai.types.chat import (
|
||||
ChatCompletionToolParam,
|
||||
)
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame
|
||||
from pipecat.frames.frames import OutputAudioRawFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMUserContextAggregator, LLMAssistantContextAggregator
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMContextFrame, OpenAILLMService
|
||||
from pipecat.services.ai_services import AIService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
@@ -36,6 +26,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -59,52 +50,46 @@ for file in sound_files:
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the sound and convert it to bytes
|
||||
with wave.open(full_path) as audio_file:
|
||||
sounds[file] = AudioRawFrame(audio_file.readframes(-1),
|
||||
audio_file.getframerate(), audio_file.getnchannels())
|
||||
sounds[file] = OutputAudioRawFrame(
|
||||
audio_file.readframes(-1), audio_file.getframerate(), audio_file.getnchannels()
|
||||
)
|
||||
|
||||
|
||||
class IntakeProcessor:
|
||||
def __init__(
|
||||
self,
|
||||
context: OpenAILLMContext,
|
||||
llm: AIService,
|
||||
tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._context: OpenAILLMContext = context
|
||||
self._llm = llm
|
||||
def __init__(self, context: OpenAILLMContext):
|
||||
print(f"Initializing context from IntakeProcessor")
|
||||
self._context.add_message({"role": "system", "content": "You are Jessica, an agent for a company called Tri-County Health Services. Your job is to collect important information from the user before their doctor visit. You're talking to Chad Bailey. You should address the user by their first name and be polite and professional. You're not a medical professional, so you shouldn't provide any advice. Keep your responses short. Your job is to collect information to give to a doctor. Don't make assumptions about what values to plug into functions. Ask for clarification if a user response is ambiguous. Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function."})
|
||||
self._context.set_tools([
|
||||
context.add_message(
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "verify_birthday",
|
||||
"description": "Use this function to verify the user has provided their correct birthday.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"birthday": {
|
||||
"type": "string",
|
||||
"description": "The user's birthdate, including the year. The user can provide it in any format, but convert it to YYYY-MM-DD format to call this function.",
|
||||
}},
|
||||
"role": "system",
|
||||
"content": "You are Jessica, an agent for a company called Tri-County Health Services. Your job is to collect important information from the user before their doctor visit. You're talking to Chad Bailey. You should address the user by their first name and be polite and professional. You're not a medical professional, so you shouldn't provide any advice. Keep your responses short. Your job is to collect information to give to a doctor. Don't make assumptions about what values to plug into functions. Ask for clarification if a user response is ambiguous. Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function.",
|
||||
}
|
||||
)
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "verify_birthday",
|
||||
"description": "Use this function to verify the user has provided their correct birthday.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"birthday": {
|
||||
"type": "string",
|
||||
"description": "The user's birthdate, including the year. The user can provide it in any format, but convert it to YYYY-MM-DD format to call this function.",
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}])
|
||||
# Create an allowlist of functions that the LLM can call
|
||||
self._functions = [
|
||||
"verify_birthday",
|
||||
"list_prescriptions",
|
||||
"list_allergies",
|
||||
"list_conditions",
|
||||
"list_visit_reasons",
|
||||
]
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
async def verify_birthday(self, llm, args):
|
||||
async def verify_birthday(
|
||||
self, function_name, tool_call_id, args, llm, context, result_callback
|
||||
):
|
||||
if args["birthday"] == "1983-01-01":
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -129,23 +114,40 @@ class IntakeProcessor:
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}])
|
||||
}
|
||||
]
|
||||
)
|
||||
# It's a bit weird to push this to the LLM, but it gets it into the pipeline
|
||||
await llm.push_frame(sounds["ding2.wav"], FrameDirection.DOWNSTREAM)
|
||||
# await llm.push_frame(sounds["ding2.wav"], FrameDirection.DOWNSTREAM)
|
||||
# We don't need the function call in the context, so just return a new
|
||||
# system message and let the framework re-prompt
|
||||
return [{"role": "system", "content": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages."}]
|
||||
await result_callback(
|
||||
[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages.",
|
||||
}
|
||||
]
|
||||
)
|
||||
else:
|
||||
# The user provided an incorrect birthday; ask them to try again
|
||||
return [{"role": "system", "content": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function."}]
|
||||
await result_callback(
|
||||
[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function.",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
async def start_prescriptions(self, llm):
|
||||
async def start_prescriptions(self, function_name, llm, context):
|
||||
print(f"!!! doing start prescriptions")
|
||||
# Move on to allergies
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -163,24 +165,30 @@ class IntakeProcessor:
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "What the user is allergic to",
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}])
|
||||
self._context.add_message(
|
||||
}
|
||||
]
|
||||
)
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Next, ask the user if they have any allergies. Once they have listed their allergies or confirmed they don't have any, call the list_allergies function."})
|
||||
"content": "Next, ask the user if they have any allergies. Once they have listed their allergies or confirmed they don't have any, call the list_allergies function.",
|
||||
}
|
||||
)
|
||||
print(f"!!! about to await llm process frame in start prescrpitions")
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
print(f"!!! past await process frame in start prescriptions")
|
||||
|
||||
async def start_allergies(self, llm):
|
||||
async def start_allergies(self, function_name, llm, context):
|
||||
print("!!! doing start allergies")
|
||||
# Move on to conditions
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -198,23 +206,28 @@ class IntakeProcessor:
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The user's medical condition",
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
])
|
||||
self._context.add_message(
|
||||
]
|
||||
)
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Now ask the user if they have any medical conditions the doctor should know about. Once they've answered the question, call the list_conditions function."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
"content": "Now ask the user if they have any medical conditions the doctor should know about. Once they've answered the question, call the list_conditions function.",
|
||||
}
|
||||
)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def start_conditions(self, llm):
|
||||
async def start_conditions(self, function_name, llm, context):
|
||||
print("!!! doing start conditions")
|
||||
# Move on to visit reasons
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -232,42 +245,50 @@ class IntakeProcessor:
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The user's reason for visiting the doctor",
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}])
|
||||
self._context.add_message(
|
||||
{"role": "system", "content": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
}
|
||||
]
|
||||
)
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function.",
|
||||
}
|
||||
)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def start_visit_reasons(self, llm):
|
||||
async def start_visit_reasons(self, function_name, llm, context):
|
||||
print("!!! doing start visit reasons")
|
||||
# move to finish call
|
||||
self._context.set_tools([])
|
||||
self._context.add_message({"role": "system",
|
||||
"content": "Now, thank the user and end the conversation."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
context.set_tools([])
|
||||
context.add_message(
|
||||
{"role": "system", "content": "Now, thank the user and end the conversation."}
|
||||
)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def save_data(self, llm, args):
|
||||
async def save_data(self, function_name, tool_call_id, args, llm, context, result_callback):
|
||||
logger.info(f"!!! Saving data: {args}")
|
||||
# Since this is supposed to be "async", returning None from the callback
|
||||
# will prevent adding anything to context or re-prompting
|
||||
return None
|
||||
await result_callback(None)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
@@ -279,63 +300,53 @@ async def main(room_url: str, token):
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
# tts = CartesiaTTSService(
|
||||
# api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
# voice_id="846d6cb0-2301-48b6-9683-48f5618ea2f6", # Spanish-speaking Lady
|
||||
# )
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = []
|
||||
context = OpenAILLMContext(messages=messages)
|
||||
user_context = LLMUserContextAggregator(context)
|
||||
assistant_context = LLMAssistantContextAggregator(context)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
intake = IntakeProcessor(context, llm)
|
||||
intake = IntakeProcessor(context)
|
||||
llm.register_function("verify_birthday", intake.verify_birthday)
|
||||
llm.register_function(
|
||||
"list_prescriptions",
|
||||
intake.save_data,
|
||||
start_callback=intake.start_prescriptions)
|
||||
"list_prescriptions", intake.save_data, start_callback=intake.start_prescriptions
|
||||
)
|
||||
llm.register_function(
|
||||
"list_allergies",
|
||||
intake.save_data,
|
||||
start_callback=intake.start_allergies)
|
||||
"list_allergies", intake.save_data, start_callback=intake.start_allergies
|
||||
)
|
||||
llm.register_function(
|
||||
"list_conditions",
|
||||
intake.save_data,
|
||||
start_callback=intake.start_conditions)
|
||||
"list_conditions", intake.save_data, start_callback=intake.start_conditions
|
||||
)
|
||||
llm.register_function(
|
||||
"list_visit_reasons",
|
||||
intake.save_data,
|
||||
start_callback=intake.start_visit_reasons)
|
||||
"list_visit_reasons", intake.save_data, start_callback=intake.start_visit_reasons
|
||||
)
|
||||
|
||||
fl = FrameLogger("LLM Output")
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport input
|
||||
user_context, # User responses
|
||||
llm, # LLM
|
||||
fl, # Frame logger
|
||||
tts, # TTS
|
||||
transport.output(), # Transport output
|
||||
assistant_context, # Assistant responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
fl, # Frame logger
|
||||
tts, # TTS
|
||||
transport.output(), # Transport output
|
||||
context_aggregator.assistant(), # Assistant responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=False))
|
||||
|
||||
@@ -351,5 +362,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,openai,silero]
|
||||
pipecat-ai[daily,cartesia,openai,silero]
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +31,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
@@ -1,31 +1,52 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -39,45 +60,42 @@ app.add_middleware(
|
||||
@app.get("/start")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room_url, room_name = _create_room()
|
||||
print(f"!!! Room URL: {room_url}")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
print(f"!!! Room URL: {room.url}")
|
||||
# Ensure the room property is present
|
||||
if not room_url:
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
|
||||
)
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
|
||||
)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
],
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
bot_procs[proc.pid] = (proc, room.url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
return RedirectResponse(room.url)
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
@@ -87,8 +105,7 @@ def get_status(pid: int):
|
||||
|
||||
# If the subprocess doesn't exist, return an error
|
||||
if not proc:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
|
||||
# Check the status of the subprocess
|
||||
if proc[0].poll() is None:
|
||||
@@ -105,14 +122,10 @@ if __name__ == "__main__":
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str, default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int, default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
print(f"to join a test room, visit http://localhost:{config.port}/start")
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
@@ -8,19 +14,22 @@ from PIL import Image
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import (
|
||||
AudioRawFrame,
|
||||
ImageRawFrame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
Frame,
|
||||
LLMMessagesFrame,
|
||||
TTSStoppedFrame
|
||||
TTSAudioRawFrame,
|
||||
TTSStoppedFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTranscriptionSettings, DailyTransport
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
@@ -28,6 +37,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -43,7 +53,7 @@ for i in range(1, 26):
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
@@ -66,7 +76,7 @@ class TalkingAnimation(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
if isinstance(frame, TTSAudioRawFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
@@ -77,8 +87,10 @@ class TalkingAnimation(FrameProcessor):
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -99,17 +111,15 @@ async def main(room_url: str, token):
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
@@ -117,9 +127,7 @@ async def main(room_url: str, token):
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -128,7 +136,6 @@ async def main(room_url: str, token):
|
||||
# English
|
||||
#
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.",
|
||||
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
@@ -141,15 +148,17 @@ async def main(room_url: str, token):
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_response,
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
assistant_response,
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
assistant_response,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
await task.queue_frame(quiet_frame)
|
||||
@@ -165,5 +174,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,openai,silero]
|
||||
pipecat-ai[daily,elevenlabs,openai,silero]
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +31,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
@@ -1,31 +1,52 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -39,45 +60,42 @@ app.add_middleware(
|
||||
@app.get("/start")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room_url, room_name = _create_room()
|
||||
print(f"!!! Room URL: {room_url}")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
print(f"!!! Room URL: {room.url}")
|
||||
# Ensure the room property is present
|
||||
if not room_url:
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
|
||||
)
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
|
||||
)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
],
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
bot_procs[proc.pid] = (proc, room.url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
return RedirectResponse(room.url)
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
@@ -87,8 +105,7 @@ def get_status(pid: int):
|
||||
|
||||
# If the subprocess doesn't exist, return an error
|
||||
if not proc:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
|
||||
# Check the status of the subprocess
|
||||
if proc[0].poll() is None:
|
||||
@@ -105,14 +122,10 @@ if __name__ == "__main__":
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str, default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int, default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM python:3.11-bullseye
|
||||
FROM python:3.11-slim-bookworm
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
ARG USE_PERSISTENT_DATA
|
||||
@@ -51,4 +51,4 @@ COPY --chown=user ./frontend/ frontend/
|
||||
RUN cd frontend && npm install && npm run build
|
||||
|
||||
# Start the FastAPI server
|
||||
CMD python3 src/server.py --port ${FAST_API_PORT}
|
||||
CMD python3 src/bot_runner.py --port ${FAST_API_PORT}
|
||||
@@ -48,6 +48,8 @@ pip install -r requirements.txt
|
||||
mv env.example .env
|
||||
```
|
||||
|
||||
When deploying to production, to ensure only this app can spawn a new bot, set your `ENV` to `production`
|
||||
|
||||
**Build the frontend:**
|
||||
|
||||
This project uses a custom frontend, which needs to built. Note: this is done automatically as part of the Docker deployment.
|
||||
@@ -64,11 +66,11 @@ The build UI files can be found in `frontend/out`
|
||||
|
||||
Start the API / bot manager:
|
||||
|
||||
`python src/server.py`
|
||||
`python src/bot_runner.py`
|
||||
|
||||
If you'd like to run a custom domain or port:
|
||||
|
||||
`python src/server.py --host somehost --p 7777`
|
||||
`python src/bot_runner.py --host somehost --p someport`
|
||||
|
||||
➡️ Open the host URL in your browser `http://localhost:7860`
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
DAILY_API_KEY=7df...
|
||||
ELEVENLABS_API_KEY=aeb...
|
||||
ELEVENLABS_VOICE_ID=7S...
|
||||
FAL_KEY=8c...
|
||||
OPENAI_API_KEY=sk-PL...
|
||||
DAILY_API_KEY=
|
||||
DAILY_SAMPLE_ROOM_URL=
|
||||
ELEVENLABS_API_KEY=
|
||||
ELEVENLABS_VOICE_ID=
|
||||
FAL_KEY=
|
||||
OPENAI_API_KEY=
|
||||
|
||||
ENV= # dev | production
|
||||
RUN_AS_VM= # Set this if you want to run bots on process (not launch a new VM)
|
||||
@@ -27,14 +27,11 @@ export default function Call() {
|
||||
|
||||
// Create a new room for the story session
|
||||
try {
|
||||
const response = await fetch("/create", {
|
||||
const response = await fetch("/start_bot", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
room_url: process.env.NEXT_PUBLIC_ROOM_URL || null,
|
||||
}),
|
||||
});
|
||||
|
||||
const { room_url, token } = await response.json();
|
||||
@@ -55,21 +52,9 @@ export default function Call() {
|
||||
// Disable local audio, the bot will say hello first
|
||||
daily.setLocalAudio(false);
|
||||
|
||||
// Start the bot
|
||||
const resp = await fetch("/start", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
room_url,
|
||||
}),
|
||||
});
|
||||
|
||||
setState("started");
|
||||
} catch (error) {
|
||||
setState("error");
|
||||
leave();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,7 +64,13 @@ export default function Call() {
|
||||
}
|
||||
|
||||
if (state === "error") {
|
||||
return <div>An Error occured</div>;
|
||||
return (
|
||||
<div className="flex items-center mx-auto">
|
||||
<p className="text-red-500 font-semibold bg-white px-4 py-2 shadow-xl rounded-lg">
|
||||
This demo is currently at capacity. Please try again later.
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (state === "started") {
|
||||
|
||||
@@ -108,26 +108,26 @@ export default function DevicePicker({}: Props) {
|
||||
{hasMicError && (
|
||||
<div className="error">
|
||||
{micState === "blocked" ? (
|
||||
<p>
|
||||
<p className="text-red-500">
|
||||
Please check your browser and system permissions. Make sure that
|
||||
this app is allowed to access your microphone.
|
||||
</p>
|
||||
) : micState === "in-use" ? (
|
||||
<p>
|
||||
<p className="text-red-500">
|
||||
Your microphone is being used by another app. Please close any
|
||||
other apps using your microphone and restart this app.
|
||||
</p>
|
||||
) : micState === "not-found" ? (
|
||||
<p>
|
||||
<p className="text-red-500">
|
||||
No microphone seems to be connected. Please connect a microphone.
|
||||
</p>
|
||||
) : micState === "not-supported" ? (
|
||||
<p>
|
||||
<p className="text-red-500">
|
||||
This app is not supported on your device. Please update your
|
||||
software or use a different device.
|
||||
</p>
|
||||
) : (
|
||||
<p>
|
||||
<p className="text-red-500">
|
||||
There seems to be an issue accessing your microphone. Try
|
||||
restarting the app or consult a system administrator.
|
||||
</p>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import React from "react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import DevicePicker from "@/components/DevicePicker";
|
||||
import { IconEar, IconLoader2 } from "@tabler/icons-react";
|
||||
import { IconAlertCircle, IconEar, IconLoader2 } from "@tabler/icons-react";
|
||||
|
||||
type SetupProps = {
|
||||
handleStart: () => void;
|
||||
@@ -24,7 +24,6 @@ export const Setup: React.FC<SetupProps> = ({ handleStart }) => {
|
||||
<h1 className="text-4xl font-bold text-pretty tracking-tighter mb-4">
|
||||
Welcome to <span className="text-sky-500">Storytime</span>
|
||||
</h1>
|
||||
|
||||
{state === "intro" ? (
|
||||
<>
|
||||
<p className="text-gray-600 leading-relaxed text-pretty">
|
||||
@@ -38,6 +37,9 @@ export const Setup: React.FC<SetupProps> = ({ handleStart }) => {
|
||||
<IconEar size={24} /> For best results, try in a quiet
|
||||
environment!
|
||||
</p>
|
||||
<p className="flex flex-row gap-2 text-gray-600 font-medium text-red-500">
|
||||
<IconAlertCircle size={24} /> This demo expires after 5 minutes.
|
||||
</p>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
@@ -49,7 +51,6 @@ export const Setup: React.FC<SetupProps> = ({ handleStart }) => {
|
||||
<DevicePicker />
|
||||
</>
|
||||
)}
|
||||
|
||||
<hr className="border-gray-150 my-2" />
|
||||
|
||||
<Button
|
||||
|
||||
@@ -1,2 +1 @@
|
||||
NEXT_PUBLIC_ROOM_URL=
|
||||
SITE_URL=
|
||||
6240
examples/storytelling-chatbot/frontend/package-lock.json
generated
Normal file
6240
examples/storytelling-chatbot/frontend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,5 @@
|
||||
async_timeout
|
||||
fastapi
|
||||
uvicorn
|
||||
requests
|
||||
python-dotenv
|
||||
pipecat-ai[daily,openai,fal]
|
||||
pipecat-ai[daily,elevenlabs,openai,fal]
|
||||
|
||||
@@ -5,15 +5,22 @@ import os
|
||||
import sys
|
||||
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame, StopTaskFrame
|
||||
from pipecat.frames.frames import LLMMessagesFrame, StopTaskFrame, EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyTransportMessageFrame
|
||||
from pipecat.transports.services.daily import (
|
||||
DailyParams,
|
||||
DailyTransport,
|
||||
DailyTransportMessageFrame,
|
||||
)
|
||||
|
||||
from processors import StoryProcessor, StoryImageProcessor
|
||||
from prompts import LLM_BASE_PROMPT, LLM_INTRO_PROMPT, CUE_USER_TURN
|
||||
@@ -22,6 +29,7 @@ from utils.helpers import load_sounds, load_images
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +41,6 @@ images = load_images(["book1.png", "book2.png"])
|
||||
|
||||
async def main(room_url, token=None):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
|
||||
# -------------- Transport --------------- #
|
||||
|
||||
transport = DailyTransport(
|
||||
@@ -47,29 +54,22 @@ async def main(room_url, token=None):
|
||||
camera_out_height=768,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
logger.debug("Transport created for room:" + room_url)
|
||||
|
||||
# -------------- Services --------------- #
|
||||
|
||||
llm_service = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o"
|
||||
)
|
||||
llm_service = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts_service = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
fal_service_params = FalImageGenService.InputParams(
|
||||
image_size={
|
||||
"width": 768,
|
||||
"height": 768
|
||||
}
|
||||
image_size={"width": 768, "height": 768}
|
||||
)
|
||||
|
||||
fal_service = FalImageGenService(
|
||||
@@ -111,12 +111,12 @@ async def main(room_url, token=None):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await intro_task.queue_frames(
|
||||
[
|
||||
images['book1'],
|
||||
images["book1"],
|
||||
LLMMessagesFrame([LLM_INTRO_PROMPT]),
|
||||
DailyTransportMessageFrame(CUE_USER_TURN),
|
||||
sounds["listening"],
|
||||
images['book2'],
|
||||
StopTaskFrame()
|
||||
images["book2"],
|
||||
StopTaskFrame(),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -126,21 +126,34 @@ async def main(room_url, token=None):
|
||||
|
||||
# The main story pipeline is used to continue the story based on user
|
||||
# input.
|
||||
main_pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_responses,
|
||||
llm_service,
|
||||
story_processor,
|
||||
image_processor,
|
||||
tts_service,
|
||||
transport.output(),
|
||||
llm_responses
|
||||
])
|
||||
main_pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_responses,
|
||||
llm_service,
|
||||
story_processor,
|
||||
image_processor,
|
||||
tts_service,
|
||||
transport.output(),
|
||||
llm_responses,
|
||||
]
|
||||
)
|
||||
|
||||
main_task = PipelineTask(main_pipeline)
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
intro_task.queue_frame(EndFrame())
|
||||
await main_task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if state == "left":
|
||||
await main_task.queue_frame(EndFrame())
|
||||
|
||||
await runner.run(main_task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Daily Storyteller Bot")
|
||||
parser.add_argument("-u", type=str, help="Room URL")
|
||||
|
||||
245
examples/storytelling-chatbot/src/bot_runner.py
Normal file
245
examples/storytelling-chatbot/src/bot_runner.py
Normal file
@@ -0,0 +1,245 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomProperties,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# ------------ Fast API Config ------------ #
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Mount the static directory
|
||||
STATIC_DIR = "frontend/out"
|
||||
|
||||
|
||||
# ------------ Fast API Routes ------------ #
|
||||
|
||||
app.mount("/static", StaticFiles(directory=STATIC_DIR, html=True), name="static")
|
||||
|
||||
|
||||
@app.post("/start_bot")
|
||||
async def start_bot(request: Request) -> JSONResponse:
|
||||
if os.getenv("ENV", "dev") == "production":
|
||||
# Only allow requests from the specified domain
|
||||
host_header = request.headers.get("host")
|
||||
allowed_domains = ["storytelling-chatbot.fly.dev", "www.storytelling-chatbot.fly.dev"]
|
||||
# Check if the Host header matches the allowed domain
|
||||
if host_header not in allowed_domains:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
try:
|
||||
data = await request.json()
|
||||
# Is this a webhook creation request?
|
||||
if "test" in data:
|
||||
return JSONResponse({"test": True})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Use specified room URL, or create a new one if not specified
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", "")
|
||||
|
||||
if not room_url:
|
||||
params = DailyRoomParams(properties=DailyRoomProperties())
|
||||
try:
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Unable to provision room {e}")
|
||||
else:
|
||||
# Check passed room URL exists, we should assume that it already has a sip set up
|
||||
try:
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
# Launch a new VM, or run as a shell process (not recommended)
|
||||
if os.getenv("RUN_AS_VM", False):
|
||||
try:
|
||||
await virtualize_bot(room.url, token)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to spawn VM: {e}")
|
||||
else:
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
user_token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"room_url": room.url,
|
||||
"token": user_token,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/{path_name:path}", response_class=FileResponse)
|
||||
async def catch_all(path_name: Optional[str] = ""):
|
||||
if path_name == "":
|
||||
return FileResponse(f"{STATIC_DIR}/index.html")
|
||||
|
||||
file_path = Path(STATIC_DIR) / (path_name or "")
|
||||
|
||||
if file_path.is_file():
|
||||
return file_path
|
||||
|
||||
html_file_path = file_path.with_suffix(".html")
|
||||
if html_file_path.is_file():
|
||||
return FileResponse(html_file_path)
|
||||
|
||||
raise HTTPException(status_code=450, detail="Incorrect API call")
|
||||
|
||||
|
||||
# ------------ Virtualization ------------ #
|
||||
|
||||
|
||||
async def virtualize_bot(room_url: str, token: str):
|
||||
"""
|
||||
This is an example of how to virtualize the bot using Fly.io
|
||||
You can adapt this method to use whichever cloud provider you prefer.
|
||||
"""
|
||||
FLY_API_HOST = os.getenv("FLY_API_HOST", "https://api.machines.dev/v1")
|
||||
FLY_APP_NAME = os.getenv("FLY_APP_NAME", "storytelling-chatbot")
|
||||
FLY_API_KEY = os.getenv("FLY_API_KEY", "")
|
||||
FLY_HEADERS = {"Authorization": f"Bearer {FLY_API_KEY}", "Content-Type": "application/json"}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use the same image as the bot runner
|
||||
async with session.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Unable to get machine info from Fly: {text}")
|
||||
|
||||
data = await r.json()
|
||||
image = data[0]["config"]["image"]
|
||||
|
||||
# Machine configuration
|
||||
cmd = f"python3 src/bot.py -u {room_url} -t {token}"
|
||||
cmd = cmd.split()
|
||||
worker_props = {
|
||||
"config": {
|
||||
"image": image,
|
||||
"auto_destroy": True,
|
||||
"init": {"cmd": cmd},
|
||||
"restart": {"policy": "no"},
|
||||
"guest": {"cpu_kind": "shared", "cpus": 1, "memory_mb": 512},
|
||||
},
|
||||
}
|
||||
|
||||
# Spawn a new machine instance
|
||||
async with session.post(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS, json=worker_props
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Problem starting a bot worker: {text}")
|
||||
|
||||
data = await r.json()
|
||||
# Wait for the machine to enter the started state
|
||||
vm_id = data["id"]
|
||||
|
||||
async with session.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started",
|
||||
headers=FLY_HEADERS,
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Bot was unable to enter started state: {text}")
|
||||
|
||||
print(f"Machine joined room: {room_url}")
|
||||
|
||||
|
||||
# ------------ Main ------------ #
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
required_env_vars = [
|
||||
"OPENAI_API_KEY",
|
||||
"DAILY_API_KEY",
|
||||
"FAL_KEY",
|
||||
"ELEVENLABS_VOICE_ID",
|
||||
"ELEVENLABS_API_KEY",
|
||||
]
|
||||
for env_var in required_env_vars:
|
||||
if env_var not in os.environ:
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
import uvicorn
|
||||
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str, default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int, default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
uvicorn.run("bot_runner:app", host=config.host, port=config.port, reload=config.reload)
|
||||
@@ -6,7 +6,8 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMFullResponseEndFrame,
|
||||
TextFrame,
|
||||
UserStoppedSpeakingFrame)
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.services.daily import DailyTransportMessageFrame
|
||||
|
||||
@@ -35,6 +36,7 @@ class StoryPromptFrame(TextFrame):
|
||||
|
||||
# ------------ Frame Processors ----------- #
|
||||
|
||||
|
||||
class StoryImageProcessor(FrameProcessor):
|
||||
"""
|
||||
Processor for image prompt frames that will be sent to the FAL service.
|
||||
@@ -113,7 +115,7 @@ class StoryProcessor(FrameProcessor):
|
||||
# Extract the image prompt from the text using regex
|
||||
image_prompt = re.search(r"<(.*?)>", self._text).group(1)
|
||||
# Remove the image prompt from the text
|
||||
self._text = re.sub(r"<.*?>", '', self._text, count=1)
|
||||
self._text = re.sub(r"<.*?>", "", self._text, count=1)
|
||||
# Process the image prompt frame
|
||||
await self.push_frame(StoryImageFrame(image_prompt))
|
||||
|
||||
@@ -124,8 +126,7 @@ class StoryProcessor(FrameProcessor):
|
||||
if re.search(r".*\[[bB]reak\].*", self._text):
|
||||
# Remove the [break] token from the text
|
||||
# so it isn't spoken out loud by the TTS
|
||||
self._text = re.sub(r'\[[bB]reak\]', '',
|
||||
self._text, flags=re.IGNORECASE)
|
||||
self._text = re.sub(r"\[[bB]reak\]", "", self._text, flags=re.IGNORECASE)
|
||||
self._text = self._text.replace("\n", " ")
|
||||
if len(self._text) > 2:
|
||||
# Append the sentence to the story
|
||||
|
||||
@@ -3,7 +3,7 @@ LLM_INTRO_PROMPT = {
|
||||
"content": "You are a creative storyteller who loves to tell whimsical, fantastical stories. \
|
||||
Your goal is to craft an engaging and fun story. \
|
||||
Start by asking the user what kind of story they'd like to hear. Don't provide any examples. \
|
||||
Keep your response to only a few sentences."
|
||||
Keep your response to only a few sentences.",
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ LLM_BASE_PROMPT = {
|
||||
Responses should use the format: <...> story sentence [break] <...> story sentence [break] ... \
|
||||
After each response, ask me how I'd like the story to continue and wait for my input. \
|
||||
Please ensure your responses are less than 3-4 sentences long. \
|
||||
Please refrain from using any explicit language or content. Do not tell scary stories."
|
||||
Please refrain from using any explicit language or content. Do not tell scary stories.",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,175 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token, get_name_from_url
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Mount the static directory
|
||||
STATIC_DIR = "frontend/out"
|
||||
|
||||
app.mount("/static", StaticFiles(directory=STATIC_DIR, html=True), name="static")
|
||||
|
||||
|
||||
@app.post("/create")
|
||||
async def create_room(request: Request) -> JSONResponse:
|
||||
data = await request.json()
|
||||
|
||||
if data.get('room_url') is not None:
|
||||
room_url = data.get('room_url')
|
||||
room_name = get_name_from_url(room_url)
|
||||
else:
|
||||
room_url, room_name = _create_room()
|
||||
|
||||
token = get_token(room_url)
|
||||
|
||||
return JSONResponse({"room_url": room_url, "room_name": room_name, "token": token})
|
||||
|
||||
|
||||
@app.post("/start")
|
||||
async def start_agent(request: Request) -> JSONResponse:
|
||||
data = await request.json()
|
||||
|
||||
# Is this a webhook creation request?
|
||||
if "test" in data:
|
||||
return JSONResponse({"test": True})
|
||||
|
||||
# Ensure the room property is present
|
||||
room_url = data.get('room_url')
|
||||
if not room_url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return JSONResponse({"bot_id": proc.pid, "room_url": room_url})
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
def get_status(pid: int):
|
||||
# Look up the subprocess
|
||||
proc = bot_procs.get(pid)
|
||||
|
||||
# If the subprocess doesn't exist, return an error
|
||||
if not proc:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
|
||||
# Check the status of the subprocess
|
||||
if proc[0].poll() is None:
|
||||
status = "running"
|
||||
else:
|
||||
status = "finished"
|
||||
|
||||
return JSONResponse({"bot_id": pid, "status": status})
|
||||
|
||||
|
||||
@app.get("/{path_name:path}", response_class=FileResponse)
|
||||
async def catch_all(path_name: Optional[str] = ""):
|
||||
if path_name == "":
|
||||
return FileResponse(f"{STATIC_DIR}/index.html")
|
||||
|
||||
file_path = Path(STATIC_DIR) / (path_name or "")
|
||||
|
||||
if file_path.is_file():
|
||||
return file_path
|
||||
|
||||
html_file_path = file_path.with_suffix(".html")
|
||||
if html_file_path.is_file():
|
||||
return FileResponse(html_file_path)
|
||||
|
||||
raise HTTPException(status_code=450, detail="Incorrect API call")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
required_env_vars = ['OPENAI_API_KEY', 'DAILY_API_KEY',
|
||||
'FAL_KEY', 'ELEVENLABS_VOICE_ID', 'ELEVENLABS_API_KEY']
|
||||
for env_var in required_env_vars:
|
||||
if env_var not in os.environ:
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
import uvicorn
|
||||
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
uvicorn.run(
|
||||
"server:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
reload=config.reload
|
||||
)
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
import wave
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame, ImageRawFrame
|
||||
from pipecat.frames.frames import OutputAudioRawFrame, OutputImageRawFrame
|
||||
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
@@ -16,7 +16,9 @@ def load_images(image_files):
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
images[filename] = ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format)
|
||||
images[filename] = OutputImageRawFrame(
|
||||
image=img.tobytes(), size=img.size, format=img.format
|
||||
)
|
||||
return images
|
||||
|
||||
|
||||
@@ -30,8 +32,10 @@ def load_sounds(sound_files):
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the sound and convert it to bytes
|
||||
with wave.open(full_path) as audio_file:
|
||||
sounds[filename] = AudioRawFrame(audio=audio_file.readframes(-1),
|
||||
sample_rate=audio_file.getframerate(),
|
||||
num_channels=audio_file.getnchannels())
|
||||
sounds[filename] = OutputAudioRawFrame(
|
||||
audio=audio_file.readframes(-1),
|
||||
sample_rate=audio_file.getframerate(),
|
||||
num_channels=audio_file.getnchannels(),
|
||||
)
|
||||
|
||||
return sounds
|
||||
|
||||
13
examples/studypal/README.md
Normal file
13
examples/studypal/README.md
Normal file
@@ -0,0 +1,13 @@
|
||||
# studypal
|
||||
|
||||
### Have a conversation about any article on the web
|
||||
|
||||
studypal is a fast conversational AI built using [Daily](https://www.daily.co/) for real-time media transport and [Cartesia](https://cartesia.ai) for text-to-speech. Everything is orchestrated together (VAD -> STT -> LLM -> TTS) using [Pipecat](https://www.pipecat.ai/).
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository
|
||||
2. Copy `env.example` to a `.env` file and add API keys
|
||||
3. Install the required packages: `pip install -r requirements.txt`
|
||||
4. Run `python3 studypal.py` from your command line.
|
||||
5. While the app is running, go to the `https://<yourdomain>.daily.co/<room_url>` set in `DAILY_SAMPLE_ROOM_URL` and talk to studypal!
|
||||
5
examples/studypal/env.example
Normal file
5
examples/studypal/env.example
Normal file
@@ -0,0 +1,5 @@
|
||||
DAILY_SAMPLE_ROOM_URL= # Follow instructions here and put your https://YOURDOMAIN.daily.co/YOURROOM (Instructions: https://docs.pipecat.ai/quickstart#preparing-your-environment)
|
||||
DAILY_API_KEY= # Create here: https://dashboard.daily.co/developers
|
||||
OPENAI_API_KEY= # Create here: https://platform.openai.com/docs/overview
|
||||
CARTESIA_API_KEY= # Create here: https://play.cartesia.ai/console
|
||||
CARTESIA_VOICE_ID= # Find here: https://play.cartesia.ai/
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user