Compare commits
323 Commits
transcript
...
v0.0.9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b254525d3c | ||
|
|
6c06fb8169 | ||
|
|
721cd11d62 | ||
|
|
bfbcb9d531 | ||
|
|
724e78c5be | ||
|
|
d3c3d78855 | ||
|
|
8fa9fdcd5a | ||
|
|
7856d20a38 | ||
|
|
6d10027f2d | ||
|
|
bea31215dc | ||
|
|
083480ca1e | ||
|
|
65846330cf | ||
|
|
29f48266f7 | ||
|
|
bfd583211c | ||
|
|
b026915d19 | ||
|
|
4a0836dc8f | ||
|
|
2729c6bf5b | ||
|
|
712a889121 | ||
|
|
2f341e4fb0 | ||
|
|
24198ecf45 | ||
|
|
7e4fefe958 | ||
|
|
e9af39b85f | ||
|
|
38aa3cebb4 | ||
|
|
72724365a0 | ||
|
|
5368462e41 | ||
|
|
1b2b29dd18 | ||
|
|
d2b2b6f619 | ||
|
|
54bcb52129 | ||
|
|
3dc7438bc8 | ||
|
|
523bb9f2a2 | ||
|
|
0c2b3f8b65 | ||
|
|
0b7578056d | ||
|
|
f1b6b9f8e5 | ||
|
|
cbc51babbe | ||
|
|
b0faafc184 | ||
|
|
103092dbb2 | ||
|
|
7b49c9ade3 | ||
|
|
1e83a405c0 | ||
|
|
7336866a1c | ||
|
|
0f23282e30 | ||
|
|
eb3bf117b1 | ||
|
|
e288aa047b | ||
|
|
9a9df35d7b | ||
|
|
af8663e95d | ||
|
|
db05a9b29b | ||
|
|
130e418800 | ||
|
|
1a0a66e503 | ||
|
|
e22babbae2 | ||
|
|
bfe2e0f36e | ||
|
|
26d401e5de | ||
|
|
3c20f9153d | ||
|
|
2f9899af5a | ||
|
|
5ef5cf30f4 | ||
|
|
34a6c5691b | ||
|
|
18bf09c704 | ||
|
|
84cfa7cc95 | ||
|
|
a5eba0106b | ||
|
|
b117a185e3 | ||
|
|
0219230827 | ||
|
|
9fcbb36997 | ||
|
|
0bf15fd6eb | ||
|
|
989252bb52 | ||
|
|
7b44a79a5b | ||
|
|
4bd29b0080 | ||
|
|
ebb76fdae9 | ||
|
|
5d52def0fe | ||
|
|
9ada56d0b0 | ||
|
|
8d73cdb2ee | ||
|
|
4f04b10202 | ||
|
|
97b923e37e | ||
|
|
57aabea0a3 | ||
|
|
319b8e7816 | ||
|
|
96950ca6df | ||
|
|
d7b2e67c35 | ||
|
|
53930b47a5 | ||
|
|
86c8ab02cc | ||
|
|
b678097f6d | ||
|
|
eb455043c4 | ||
|
|
dd696be04c | ||
|
|
96b2337183 | ||
|
|
ea52e73f57 | ||
|
|
88404e4739 | ||
|
|
0fd323714e | ||
|
|
a362ca4d3d | ||
|
|
02b5c3dd5f | ||
|
|
497a09cbc8 | ||
|
|
172a14245d | ||
|
|
302246399b | ||
|
|
9590cc2fbc | ||
|
|
09e4044c72 | ||
|
|
efdfb74dc3 | ||
|
|
158de6f20b | ||
|
|
47f68b742d | ||
|
|
2654ca1f62 | ||
|
|
4263827ee8 | ||
|
|
97fe529b0e | ||
|
|
86025723e7 | ||
|
|
6f4270a552 | ||
|
|
31f050c02b | ||
|
|
a0fe57721b | ||
|
|
abf5e57319 | ||
|
|
44de9007c3 | ||
|
|
46d265514e | ||
|
|
9e64de8606 | ||
|
|
1ea503c1e6 | ||
|
|
d0aeeccb68 | ||
|
|
d687c8cdeb | ||
|
|
951f20c788 | ||
|
|
982c0a0749 | ||
|
|
27cef7cd70 | ||
|
|
03ea208361 | ||
|
|
385b51ac83 | ||
|
|
a37e4fabad | ||
|
|
8bc3c03a69 | ||
|
|
1fc800754b | ||
|
|
18c4bccc13 | ||
|
|
d57d473c13 | ||
|
|
48bb3c6955 | ||
|
|
e3ee3f9cc6 | ||
|
|
3528f5d735 | ||
|
|
23735cb3a3 | ||
|
|
6918dc69f0 | ||
|
|
128d350abc | ||
|
|
2f59e38a7a | ||
|
|
c21014860f | ||
|
|
d4e3e1710f | ||
|
|
e7f9296b5a | ||
|
|
27322108b7 | ||
|
|
22bbedec93 | ||
|
|
ed91bc0f66 | ||
|
|
565acfa9c9 | ||
|
|
a2295b6b1d | ||
|
|
fef1366c84 | ||
|
|
5c0ba1b6f0 | ||
|
|
05c77bce25 | ||
|
|
4ce140bf84 | ||
|
|
a3293c6d7a | ||
|
|
ce04d4a54a | ||
|
|
758ed2d895 | ||
|
|
85cd795b2b | ||
|
|
6c36d5f686 | ||
|
|
b2425d6dcd | ||
|
|
e8a6560ac1 | ||
|
|
78c80d8941 | ||
|
|
2fc5de6afe | ||
|
|
24fb7c5a05 | ||
|
|
5761e23af1 | ||
|
|
960c659d5a | ||
|
|
2bda4c3307 | ||
|
|
2c5628a621 | ||
|
|
9b4cfd9a6c | ||
|
|
8f9aeb0751 | ||
|
|
e8a9d43287 | ||
|
|
cf5d516d51 | ||
|
|
0666dd1194 | ||
|
|
42e25ccd13 | ||
|
|
520cee273f | ||
|
|
a189e2618f | ||
|
|
ae2dcf88ed | ||
|
|
5cdb82ad3c | ||
|
|
593513c84a | ||
|
|
16257f8ec0 | ||
|
|
5fc21a7508 | ||
|
|
cc05429135 | ||
|
|
85e66dddbe | ||
|
|
03ea559839 | ||
|
|
b6c9859e34 | ||
|
|
bc47c909a3 | ||
|
|
428659730d | ||
|
|
a573277a10 | ||
|
|
69c2637a25 | ||
|
|
90c34d278f | ||
|
|
2f4e31d1b2 | ||
|
|
9385270775 | ||
|
|
2914e43350 | ||
|
|
78638d2dba | ||
|
|
141a5bb548 | ||
|
|
3957813202 | ||
|
|
549862ef99 | ||
|
|
1000ca5b55 | ||
|
|
91dbfef4c3 | ||
|
|
3b61d0b41a | ||
|
|
bf3ae091b9 | ||
|
|
34ac796607 | ||
|
|
e0551e9d85 | ||
|
|
b1ab6f91b9 | ||
|
|
58726dc20d | ||
|
|
8e61fe8e36 | ||
|
|
99b836c227 | ||
|
|
1c27f77f1a | ||
|
|
c91fa39a99 | ||
|
|
eacaea7db4 | ||
|
|
c6dfcb6f7a | ||
|
|
18bf26de14 | ||
|
|
b8b35db89c | ||
|
|
358166f347 | ||
|
|
c006c123b2 | ||
|
|
cf302fb765 | ||
|
|
e33820fe36 | ||
|
|
b84b3d59f3 | ||
|
|
7b5b88b99b | ||
|
|
e87196cce7 | ||
|
|
bbfc9e703b | ||
|
|
c21a63d48b | ||
|
|
f546bb32da | ||
|
|
d9378e23ba | ||
|
|
c75a3fb0d0 | ||
|
|
f8ae264957 | ||
|
|
977c12d530 | ||
|
|
61c55d2f47 | ||
|
|
fd2fa23e9c | ||
|
|
de026ccc8a | ||
|
|
c5bb0e14ab | ||
|
|
a4f3c51184 | ||
|
|
7786e685cc | ||
|
|
33793ca9f8 | ||
|
|
d26aede667 | ||
|
|
ad993056d8 | ||
|
|
5b1f26aacb | ||
|
|
4e16e514dd | ||
|
|
959ffa9d36 | ||
|
|
4396b1018a | ||
|
|
37e904ce68 | ||
|
|
ef39d842a5 | ||
|
|
72f631a066 | ||
|
|
5d46302b9e | ||
|
|
8241dc0bed | ||
|
|
95a1efbe75 | ||
|
|
e59df8476e | ||
|
|
824df8ca7c | ||
|
|
0db8a51b27 | ||
|
|
ce9c6ede66 | ||
|
|
192b46bbab | ||
|
|
196279e342 | ||
|
|
edd93bc4cb | ||
|
|
d0076dd4ee | ||
|
|
3c5f4800d4 | ||
|
|
2bcb4966d3 | ||
|
|
b14f08a7d5 | ||
|
|
8fb92e3fd7 | ||
|
|
337ca7f581 | ||
|
|
eb430621f1 | ||
|
|
d5683c4f24 | ||
|
|
b4505b7eff | ||
|
|
3e46d28aff | ||
|
|
d3e76c4fd6 | ||
|
|
62fd371b97 | ||
|
|
b9556716dd | ||
|
|
2708dcf7b5 | ||
|
|
d3f86dab2e | ||
|
|
18e7626b9f | ||
|
|
763a50f8ec | ||
|
|
3b282cc921 | ||
|
|
434772dc23 | ||
|
|
15df4a9d58 | ||
|
|
643be238f9 | ||
|
|
d90fdb1cae | ||
|
|
f710aeae95 | ||
|
|
20091d91c9 | ||
|
|
92ec5641d4 | ||
|
|
53e97bd872 | ||
|
|
dcbd79333a | ||
|
|
97a4cb8b7f | ||
|
|
cc7877f626 | ||
|
|
1992b7e79e | ||
|
|
2516670874 | ||
|
|
4fecc10808 | ||
|
|
08144fc560 | ||
|
|
815aa2bc3e | ||
|
|
560c98f2fa | ||
|
|
0e0c992f59 | ||
|
|
d76139ac1a | ||
|
|
444418d94c | ||
|
|
d27122e35e | ||
|
|
0ae83577c6 | ||
|
|
5c402eee81 | ||
|
|
80750fe022 | ||
|
|
ccfba04ea2 | ||
|
|
5b8198cf9e | ||
|
|
3fa00c4db8 | ||
|
|
4ce36f8c63 | ||
|
|
9620080cc5 | ||
|
|
ee1ce8f288 | ||
|
|
70d07b6ea2 | ||
|
|
9d5ad5675c | ||
|
|
0d96f91cde | ||
|
|
4e9586595d | ||
|
|
d0bcddfd70 | ||
|
|
065a213ebb | ||
|
|
7d6c94d604 | ||
|
|
0859b57b00 | ||
|
|
09838c9b1f | ||
|
|
c39920132c | ||
|
|
860129a4be | ||
|
|
4416f36ae9 | ||
|
|
86af896150 | ||
|
|
5cbac4701b | ||
|
|
5d9aa530e2 | ||
|
|
d4c4d49035 | ||
|
|
e81f247845 | ||
|
|
8baf137511 | ||
|
|
fcceb32bd7 | ||
|
|
ead655fe23 | ||
|
|
bab102f197 | ||
|
|
95fc802607 | ||
|
|
2886997693 | ||
|
|
5fdda43bed | ||
|
|
f0d9b0613e | ||
|
|
a661905d7f | ||
|
|
c9c2e5f561 | ||
|
|
795a339542 | ||
|
|
31db156dfc | ||
|
|
690cf2e47d | ||
|
|
ba89e41c5b | ||
|
|
c134598a77 | ||
|
|
b51abd2969 | ||
|
|
3fda9b0ecb | ||
|
|
95c92e5304 | ||
|
|
b443fbdb60 | ||
|
|
ccd2fa31e5 | ||
|
|
9b65286216 | ||
|
|
6ae733ebfe | ||
|
|
1071dede1a |
30
.dockerignore
Normal file
@@ -0,0 +1,30 @@
|
||||
# flyctl launch added from .gitignore
|
||||
**/.vscode
|
||||
**/env
|
||||
**/__pycache__
|
||||
**/*~
|
||||
**/venv
|
||||
#*#
|
||||
|
||||
# Distribution / packaging
|
||||
**/.Python
|
||||
**/build
|
||||
**/develop-eggs
|
||||
**/dist
|
||||
**/downloads
|
||||
**/eggs
|
||||
**/.eggs
|
||||
**/lib
|
||||
**/lib64
|
||||
**/parts
|
||||
**/sdist
|
||||
**/var
|
||||
**/wheels
|
||||
**/share/python-wheels
|
||||
**/*.egg-info
|
||||
**/.installed.cfg
|
||||
**/*.egg
|
||||
**/MANIFEST
|
||||
**/.DS_Store
|
||||
**/.env
|
||||
fly.toml
|
||||
44
.github/workflows/build.yaml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
name: build
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: build-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: "Build and Install"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
- name: Install basic Python dependencies
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r dev-requirements.txt
|
||||
- name: Build project
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m build
|
||||
- name: Install project and other Python dependencies
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
pip install --editable .
|
||||
44
.github/workflows/lint.yaml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
name: lint
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: build-lint-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
autopep8:
|
||||
name: "Formatting lints"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
- name: Install development Python dependencies
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r dev-requirements.txt
|
||||
- name: autopep8
|
||||
id: autopep8
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
autopep8 --max-line-length 100 --exit-code -r -d --exclude "*_pb2.py" -a -a src/
|
||||
- name: Fail if autopep8 requires changes
|
||||
if: steps.autopep8.outputs.exit-code == 2
|
||||
run: exit 1
|
||||
84
.github/workflows/publish.yaml
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
name: publish
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
gitref:
|
||||
type: string
|
||||
description: "what git ref to build"
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: "Build and upload wheels"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.gitref }}
|
||||
- name: Set up Python
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
- name: Install basic Python dependencies
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r dev-requirements.txt
|
||||
- name: Build project
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m build
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels
|
||||
path: ./dist
|
||||
|
||||
publish-to-pypi:
|
||||
name: "Publish to PyPI"
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ build ]
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/pipecat-ai
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Download wheels
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: wheels
|
||||
path: ./dist
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
print-hash: true
|
||||
|
||||
publish-to-test-pypi:
|
||||
name: "Publish to Test PyPI"
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ build ]
|
||||
environment:
|
||||
name: testpypi
|
||||
url: https://pypi.org/p/pipecat-ai
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Download wheels
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: wheels
|
||||
path: ./dist
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
print-hash: true
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
63
.github/workflows/publish_test.yaml
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
name: publish-test
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: "Build and upload wheels"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.gitref }}
|
||||
fetch-tags: true
|
||||
fetch-depth: 100
|
||||
- name: Set up Python
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
- name: Install basic Python dependencies
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r dev-requirements.txt
|
||||
- name: Build project
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m build
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels
|
||||
path: ./dist
|
||||
|
||||
publish-to-pypi:
|
||||
name: "Publish to Test PyPI"
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ build ]
|
||||
environment:
|
||||
name: testpypi
|
||||
url: https://pypi.org/p/pipecat-ai
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Download wheels
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: wheels
|
||||
path: ./dist
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
print-hash: true
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
49
.github/workflows/tests.yaml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: test
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: build-test-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: "Unit and Integration Tests"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Cache virtual environment
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
# We are hashing requirements-dev.txt and requirements-extra.txt which
|
||||
# contain all dependencies needed to run the tests and examples.
|
||||
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('linux-py3.10-requirements.txt') }}-${{ hashFiles('dev-requirements.txt') }}
|
||||
path: .venv
|
||||
- name: Install system packages
|
||||
run: sudo apt-get install -y portaudio19-dev
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
- name: Install basic Python dependencies
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r linux-py3.10-requirements.txt -r dev-requirements.txt
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" src tests
|
||||
3
.gitignore
vendored
@@ -2,6 +2,8 @@
|
||||
env/
|
||||
__pycache__/
|
||||
*~
|
||||
venv
|
||||
.venv
|
||||
#*#
|
||||
|
||||
# Distribution / packaging
|
||||
@@ -25,3 +27,4 @@ share/python-wheels/
|
||||
MANIFEST
|
||||
.DS_Store
|
||||
.env
|
||||
fly.toml
|
||||
40
Dockerfile
Normal file
@@ -0,0 +1,40 @@
|
||||
# setup
|
||||
FROM python:3.11.5
|
||||
|
||||
WORKDIR /app
|
||||
COPY requirements.txt /app
|
||||
COPY *.py /app
|
||||
COPY pyproject.toml /app
|
||||
|
||||
COPY src/ /app/src/
|
||||
COPY examples/ /app/examples/
|
||||
|
||||
WORKDIR /app
|
||||
RUN ls --recursive /app/
|
||||
RUN pip3 install --upgrade -r requirements.txt
|
||||
RUN python -m build .
|
||||
RUN pip3 install .
|
||||
RUN pip3 install gunicorn
|
||||
# If running on Ubuntu, Azure TTS requires some extra config
|
||||
# https://learn.microsoft.com/en-us/azure/ai-services/speech-service/quickstarts/setup-platform?pivots=programming-language-python&tabs=linux%2Cubuntu%2Cdotnetcli%2Cdotnet%2Cjre%2Cmaven%2Cnodejs%2Cmac%2Cpypi
|
||||
|
||||
RUN wget -O - https://www.openssl.org/source/openssl-1.1.1w.tar.gz | tar zxf -
|
||||
WORKDIR openssl-1.1.1w
|
||||
RUN ./config --prefix=/usr/local
|
||||
RUN make -j $(nproc)
|
||||
RUN make install_sw install_ssldirs
|
||||
RUN ldconfig -v
|
||||
ENV SSL_CERT_DIR=/etc/ssl/certs
|
||||
|
||||
#ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
|
||||
RUN apt clean
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install build-essential libssl-dev ca-certificates libasound2 wget
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
EXPOSE 8000
|
||||
# run
|
||||
CMD ["gunicorn", "--workers=2", "--log-level", "debug", "--chdir", "examples/server", "--capture-output", "daily-bot-manager:app", "--bind=0.0.0.0:8000"]
|
||||
24
LICENSE
Normal file
@@ -0,0 +1,24 @@
|
||||
BSD 2-Clause License
|
||||
|
||||
Copyright (c) 2024, Daily
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
202
README.md
@@ -1,20 +1,156 @@
|
||||
# dailyai SDK
|
||||
<div align="center">
|
||||
<img alt="pipecat" width="300px" height="auto" src="pipecat.png">
|
||||
</div>
|
||||
|
||||
This SDK can help you build applications that participate in WebRTC meetings and use various AI services to interact with other participants.
|
||||
# Pipecat
|
||||
|
||||
## Build/Install
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://discord.gg/pipecat)
|
||||
|
||||
`pipecat` is a framework for building voice (and multimodal) conversational agents. Things like personal coaches, meeting assistants, story-telling toys for kids, customer support bots, and snarky social companions.
|
||||
|
||||
Build things like this:
|
||||
|
||||
[](https://www.youtube.com/watch?v=lDevgsp9vn0)
|
||||
|
||||
## Getting started with voice agents
|
||||
|
||||
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when you’re ready. You can also add a telephone number, image output, video input, use different LLMs, and more.
|
||||
|
||||
```shell
|
||||
# install the module
|
||||
pip install pipecat-ai
|
||||
|
||||
# set up an .env file with API keys
|
||||
cp dot-env.template .env
|
||||
```
|
||||
|
||||
By default, in order to minimize dependencies, only the basic framework functionality is available. Some third-party AI services require additional dependencies that you can install with:
|
||||
|
||||
```shell
|
||||
pip install "pipecat-ai[option,...]"
|
||||
```
|
||||
|
||||
Your project may or may not need these, so they're made available as optional requirements. Here is a list:
|
||||
|
||||
- **AI services**: `anthropic`, `azure`, `fal`, `moondream`, `openai`, `playht`, `silero`, `whisper`
|
||||
- **Transports**: `daily`, `local`, `websocket`
|
||||
|
||||
## A simple voice agent running locally
|
||||
|
||||
If you’re doing AI-related stuff, you probably have an OpenAI API key.
|
||||
|
||||
To generate voice output, one service that’s easy to get started with is ElevenLabs. If you don’t already have an ElevenLabs developer account, you can sign up for one [here].
|
||||
|
||||
So let’s run a really simple agent that’s just a GPT-4 prompt, wired up to voice input and speaker output.
|
||||
|
||||
You can change the prompt, in the code. The current prompt is “Tell me something interesting about the Roman Empire.”
|
||||
|
||||
`cd examples/getting-started` to run the following examples …
|
||||
|
||||
```shell
|
||||
# Talk to a local pipecat process with your voice. Specify GPT-4 as the LLM.
|
||||
|
||||
export OPENAI_API_KEY=...
|
||||
export ELEVENLABS_API_KEY=...
|
||||
python ./local-mic.py | ./pipecat-pipes-gpt-4.py | ./local-speaker.py
|
||||
```
|
||||
|
||||
## WebSockets instead of pipes
|
||||
|
||||
To run your agent in the cloud, you can switch the Pipecat transport layer to use a WebSocket instead of Unix pipes.
|
||||
|
||||
```shell
|
||||
# Talk to a local pipecat process with your voice. Specify GPT-4 as the LLM.
|
||||
|
||||
export OPENAI_API_KEY=...
|
||||
export ELEVENLABS_API_KEY=...
|
||||
python ./local-mic-and-speaker-wss.py wss://localhost:8088
|
||||
```
|
||||
|
||||
## WebRTC for production use
|
||||
|
||||
WebSockets are fine for server-to-server communication or for initial development. But for production use, you’ll need client-server audio to use a protocol designed for real-time media transport. (For an explanation of the difference between WebSockets and WebRTC, see [this post.])
|
||||
|
||||
One way to get up and running quickly with WebRTC is to sign up for a Daily developer account. Daily gives you SDKs and global infrastructure for audio (and video) routing. Every account gets 10,000 audio/video/transcription minutes free each month.
|
||||
|
||||
Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://docs.daily.co/reference/rest-api/rooms) in the developer Dashboard. Then run the examples, this time connecting via WebRTC instead of a WebSocket.
|
||||
|
||||
```shell
|
||||
# 1. Run the pipecat process. Provide your Daily API key and a Daily room
|
||||
export DAILY_API_KEY=...
|
||||
export OPENAI_API_KEY=...
|
||||
export ELEVENLABS_API_KEY=...
|
||||
python pipecat-daily-gpt-4.py --daily-room https://example.daily.co/pipecat
|
||||
|
||||
# 2. Visit the Daily room link in any web browser to talk to the pipecat process.
|
||||
# You'll want to use a Daily SDK to embed the client-side code into your own
|
||||
# app. But visiting the room URL in a browser is a quick way to start building
|
||||
# agents because you can focus on just the agent code at first.
|
||||
open -a "Google Chrome" https://example.daily.co/pipecat
|
||||
```
|
||||
|
||||
## Deploy your agent to the cloud
|
||||
Now that you’ve decoupled client and server, and have a Pipecat process that can run anywhere you can run Python, you can deploy this example agent to the cloud.
|
||||
|
||||
`TBC`
|
||||
|
||||
## Taking it further
|
||||
|
||||
### Add a telephone number
|
||||
Daily supports telephone connections in addition to WebRTC streams. You can add a telephone number to your Daily room with the following REST API call. Once you’ve done that, you can call your agent on the phone.
|
||||
|
||||
You’ll need to add a credit card to your Daily account to enable telephone numbers.
|
||||
|
||||
`TBC`
|
||||
|
||||
|
||||
### Add image output
|
||||
|
||||
Daily supports telephone connections in addition to WebRTC streams. You can add a telephone number to your Daily room with the following REST API call. Once you’ve done that, you can call your agent on the phone.
|
||||
|
||||
You’ll need to add a credit card to your Daily account to enable telephone numbers.
|
||||
|
||||
`TBC`
|
||||
|
||||
### Add video output
|
||||
|
||||
|
||||
`TBC`
|
||||
|
||||
|
||||
## Code examples
|
||||
|
||||
There are two directories of examples:
|
||||
|
||||
- [foundational](https://github.com/daily-co/pipecat/tree/main/examples/foundational) — examples that build on each other, introducing one or two concepts at a time
|
||||
- [starter apps](https://github.com/daily-co/pipecat/tree/main/examples/starter-apps) — complete applications that you can use as starting points for development
|
||||
|
||||
Before running the examples you need to install the dependencies (which will install all the dependencies to run all of the examples):
|
||||
|
||||
```
|
||||
pip install -r {env}-requirements.txt
|
||||
```
|
||||
|
||||
To run the example below you need to sign up for a [free Daily account](https://dashboard.daily.co/u/signup) and create a Daily room (so you can hear the LLM talking). After that, join the room's URL directly from a browser tab and run:
|
||||
|
||||
```
|
||||
python examples/foundational/02-llm-say-one-thing.py
|
||||
```
|
||||
|
||||
## Hacking on the framework itself
|
||||
|
||||
_Note that you may need to set up a virtual environment before following the instructions below. For instance, you might need to run the following from the root of the repo:_
|
||||
|
||||
```
|
||||
python3 -m venv env
|
||||
source env/bin/activate
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
```
|
||||
|
||||
From the root of this repo, run the following:
|
||||
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
pip install -r dev-requirements.txt -r {env}-requirements.txt
|
||||
python -m build
|
||||
```
|
||||
|
||||
@@ -30,26 +166,54 @@ If you want to use this package from another directory, you can run:
|
||||
pip install path_to_this_repo
|
||||
```
|
||||
|
||||
## Running the samples
|
||||
### Running tests
|
||||
|
||||
Tou can run the simple sample like so:
|
||||
From the root directory, run:
|
||||
|
||||
```
|
||||
python src/samples/theoretical-to-real/01-say-one-thing.py -u <url of your Daily meeting> -k <your Daily API Key>
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" src tests
|
||||
```
|
||||
|
||||
Note that the sample uses Azure's TTS and LLM services. You'll need to set the following environment variables for the sample to work:
|
||||
## Setting up your editor
|
||||
|
||||
```
|
||||
AZURE_SPEECH_SERVICE_KEY
|
||||
AZURE_SPEECH_SERVICE_REGION
|
||||
AZURE_CHATGPT_KEY
|
||||
AZURE_CHATGPT_ENDPOINT
|
||||
AZURE_CHATGPT_DEPLOYMENT_ID
|
||||
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting.
|
||||
|
||||
### Emacs
|
||||
|
||||
You can use [use-package](https://github.com/jwiegley/use-package) to install [py-autopep8](https://codeberg.org/ideasman42/emacs-py-autopep8) package and configure `autopep8` arguments:
|
||||
|
||||
```elisp
|
||||
(use-package py-autopep8
|
||||
:ensure t
|
||||
:defer t
|
||||
:hook ((python-mode . py-autopep8-mode))
|
||||
:config
|
||||
(setq py-autopep8-options '("-a" "-a", "--max-line-length=100")))
|
||||
```
|
||||
|
||||
If you have those environment variables stored in an .env file, you can quickly load them into your terminal's environment by running this:
|
||||
`autopep8` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
|
||||
|
||||
```elisp
|
||||
(use-package pyvenv-auto
|
||||
:ensure t
|
||||
:defer t
|
||||
:hook ((python-mode . pyvenv-auto-run)))
|
||||
|
||||
```bash
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
```
|
||||
|
||||
### Visual Studio Code
|
||||
|
||||
Install the
|
||||
[autopep8](https://marketplace.visualstudio.com/items?itemName=ms-python.autopep8) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, enable formatting on save and configure `autopep8` arguments:
|
||||
|
||||
```json
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.autopep8",
|
||||
"editor.formatOnSave": true
|
||||
},
|
||||
"autopep8.args": [
|
||||
"-a",
|
||||
"-a",
|
||||
"--max-line-length=100"
|
||||
],
|
||||
```
|
||||
|
||||
6
dev-requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
autopep8~=2.1.0
|
||||
build~=1.2.1
|
||||
pip-tools~=7.4.1
|
||||
pytest~=8.2.0
|
||||
setuptools~=69.5.1
|
||||
setuptools_scm~=8.1.0
|
||||
17
docs/README.md
Normal file
@@ -0,0 +1,17 @@
|
||||
# Pipecat Docs
|
||||
|
||||
## [Architecture Overview](architecture.md)
|
||||
|
||||
Learn about the thinking behind the framework's design.
|
||||
|
||||
## [A Frame's Progress](frame-progress.md)
|
||||
|
||||
See how a Frame is processed through a Transport, a Pipeline, and a series of Frame Processors.
|
||||
|
||||
## [Example Code](examples/)
|
||||
|
||||
The repository includes several example apps in the `examples` directory. The docs explain how they work.
|
||||
|
||||
## [API Reference](api/)
|
||||
|
||||
Complete documentation of the available classes and methods in the SDK.
|
||||
17
docs/architecture.md
Normal file
@@ -0,0 +1,17 @@
|
||||
# Pipecat architecture guide
|
||||
|
||||
## Frames
|
||||
|
||||
Frames can represent discrete chunks of data, for instance a chunk of text, a chunk of audio, or an image. They can also be used to as control flow, for instance a frame that indicates that there is no more data available, or that a user started or stopped talking. They can also represent more complex data structures, such as a message array used for an LLM completion.
|
||||
|
||||
## FrameProcessors
|
||||
|
||||
Frame processors operate on frames. Every frame processor implements a `process_frame` method that consumes one frame and produces zero or more frames. Frame processors can do simple transforms, such as concatenating text fragments into sentences, or they can treat frames as input for an AI Service, and emit chat completions based on message arrays or transform text into audio or images.
|
||||
|
||||
## Pipelines
|
||||
|
||||
Pipelines are lists of frame processors linked together. Frame processors can push frames upstream or downstream to their peers. A very simple pipeline might chain an LLM frame processor to a text-to-speech frame processor, with a transport as an output.
|
||||
|
||||
## Transports
|
||||
|
||||
Transports provide input and output frame processors to receive or send frames respectively. For example, the `DailyTransport` does this with a WebRTC session joined to a Daily.co room.
|
||||
119
docs/examples/01-say-one-thing.md
Normal file
@@ -0,0 +1,119 @@
|
||||
# 01: Say One Thing
|
||||
|
||||
_video here - youtube?_
|
||||
|
||||
This example uses a text-to-speech (TTS) service to say one predefined sentence. But first, a quick overview of the general structure of these examples.
|
||||
|
||||
## Running the demos
|
||||
|
||||
All of the demos have something like this at the bottom of the file:
|
||||
|
||||
```python
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
```
|
||||
|
||||
### `configure()`
|
||||
|
||||
The `configure()` function comes from `examples/foundational/support/runner.py`, and it allows you to configure the examples from the command line directly, or using environment variables:
|
||||
|
||||
```bash
|
||||
python 01-say-one-thing.py -u https://YOUR_DOMAIN.daily.co/YOUR_ROOM -k YOUR_API_KEY
|
||||
# or
|
||||
DAILY_ROOM_URL=https://YOUR_DOMAIN.daily.co/YOUR_ROOM DAILY_API_KEY=YOUR_API_KEY python 01-say-one-thing.py
|
||||
# or set DAILY_ROOM_URL and DAILY_API_KEY in a .env file
|
||||
python 01-say-one-thing.py
|
||||
```
|
||||
|
||||
You'll need a Daily account to run these demos. You can sign up for free at [daily.co](https://daily.co). Once you've signed up you can create a room from the [Dashboard](https://dashboard.daily.co/rooms), and grab [your API key](https://dashboard.daily.co/developers) while you're there.
|
||||
|
||||
Some functionality (such as transcription) requires the bot to have owner privileges in the room. `runner.py` uses the Daily REST API to create a meeting token with owner privileges. You can learn more about meeting tokens in the [Daily docs](https://docs.daily.co/reference/rest-api/meeting-tokens).
|
||||
|
||||
### `asyncio.run()`
|
||||
|
||||
The AI SDK makes heavy use of Python's `asyncio` module. [This is a reasonable intro to the topic](https://builtin.com/data-science/asyncio) if you haven't worked with `asyncio` and coroutines before.
|
||||
|
||||
You can learn a bit more about the specifics of how the Daily AI SDK uses coroutines in the [Architecture Guide](../architecture.md).
|
||||
|
||||
## The `main()` function
|
||||
|
||||
All of the examples have a `main()` function with a similar structure:
|
||||
|
||||
- Configure the transport
|
||||
- Configure the AI service(s) used in the demo
|
||||
- Configure any event listeners
|
||||
- Define a processing pipeline
|
||||
- Run the example's coroutine(s)
|
||||
|
||||
### Configuring the transport
|
||||
|
||||
The first section of the `main()` function configures the transport object:
|
||||
|
||||
```python
|
||||
meeting_duration_minutes = 5
|
||||
transport = DailyTransportService(
|
||||
room_url,
|
||||
None,
|
||||
"Say One Thing",
|
||||
meeting_duration_minutes,
|
||||
)
|
||||
transport.mic_enabled = True
|
||||
```
|
||||
|
||||
The [Architecture Guide](../architecture.md) explains the transport object in more detail. In this case, we're configuring a Daily transport object and enabling the virtual microphone, so our bot can play audio.
|
||||
|
||||
### Configuring the services
|
||||
|
||||
As described in the [Architecture Guide](../architecture.md), 'a 'Service' is a class that processes 'Frames' as part of a 'Pipeline'. In this demo app, we'll only need one service: a text-to-speech generator. We can create an instance of the `ElevenLabsTTSService` class with this line of code:
|
||||
|
||||
```python
|
||||
tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
|
||||
```
|
||||
|
||||
You'll need to make sure and set those environment variables somewhere. The easiest way to do that is to copy the `example.env` file in the repo and rename it to `.env`, and then add your credentials to that file. `runner.py` loads the `python-dotenv` module and initializes it, making the values in that file available in the environment.
|
||||
|
||||
### Configuring event listeners
|
||||
|
||||
This part isn't strictly necessary for an app like this. You could include the contents of the `on_participant_joined` function directly in the body of the `main()` function, and it would run as soon as you started the script from the command line.
|
||||
|
||||
Instead, we can use an event handler to wait to run that code until someone else joins the meeting. We'll define a function called `greet_user()`, and use the `@transport.event_handler("on_participant_joined")` decorator to tell the SDK that we want to run that function whenever a user joins the room.
|
||||
|
||||
```python
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def greet_user(transport, participant):
|
||||
if participant["info"]["isLocal"]:
|
||||
return
|
||||
|
||||
await tts.say(
|
||||
"Hello there, " + participant["info"]["userName"] + "!",
|
||||
transport.send_queue,
|
||||
)
|
||||
|
||||
# wait for the output queue to be empty, then leave the meeting
|
||||
await transport.stop_when_done()
|
||||
```
|
||||
|
||||
### Defining a processing pipeline
|
||||
|
||||
In this example, we don't actually have much of a processing pipeline! In fact, we're doing the whole thing inside the `greet_user()` function already.
|
||||
|
||||
Pipelines usually look like a bunch of nested calls to the `run()` or `run_to_queue()` function from different Services. In this example, we're using the `say()` function from the TTS service. This is effectively a convenience wrapper around the `run_to_queue()` function, which we'll discuss more later. It's important to `await` this function to ensure that the speech frames are queued for playback before the next line of code, because of the `stop_when_done()` function being called immediately afterward.
|
||||
|
||||
The output of the `say()` function goes to the transport's `send_queue`. This queue is the all-important connection between the world of the Services pipeline that's generating frames asynchronously and the ordered playback of audio and visual media in the WebRTC call.
|
||||
|
||||
### Running the coroutines
|
||||
|
||||
In this example, we don't actually have any separate processing pipelines—everything happens as a result of an event from the transport. So we only need to run the transport's coroutine, and await its completion:
|
||||
|
||||
```python
|
||||
await transport.run()
|
||||
```
|
||||
|
||||
In future examples, we'll run more processes in parallel. For now, this script can run until the transport exits—which will happen based on calling `stop_when_done()` in the `greet_user()` function.
|
||||
|
||||
## Next Steps
|
||||
|
||||
Next, we'll start connecting multiple AI services together by building a service pipeline.
|
||||
|
||||
## [02 - LLM Say One Thing »](02-llm-say-one-thing.md)
|
||||
5
docs/examples/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Daily AI SDK Examples
|
||||
|
||||
The docs in this folder pair with the example apps located in `examples/foundational`. They are designed to serve as a quick references for building different kinds of AI apps. But the examples also build on one another, so it can be really helpful to walk through them in order.
|
||||
|
||||
To start, you can learn about the overall structure of the examples in [01 - Say One Thing](01-say-one-thing.md).
|
||||
46
docs/frame-progress.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# A Frame's Progress
|
||||
|
||||
1. A user says “Hello, LLM” and the cloud transcription service delivers a transcription to the Transport.
|
||||

|
||||
|
||||
2. The Transport places a Transcription frame in the Pipeline’s source queue.
|
||||

|
||||
|
||||
3. The Pipeline passes the Transcription frame to the first Frame Processor in its list, the LLM User Message Aggregator.
|
||||

|
||||
|
||||
4. The LLM User Message Aggregator updates the LLM Context with a `{“user”: “Hello LLM”}` message.
|
||||

|
||||
|
||||
5. The LLM User Message Aggregator yields an LLM Message Frame, containing the updated LLM Context. The Pipeline passes this frame to the LLM Frame Processor.
|
||||

|
||||
|
||||
6. The LLM Frame Processor creates a streaming chat completion based on the LLM context and yields the first chunk of a response, Text Frame with the value “Hi, “. The Pipeline passes this frame to the TTS Frame Processor. The TTS Frame Processor aggregates this response but doesn’t yield anything, yet, because it’s waiting for a full sentence.
|
||||

|
||||
|
||||
7. The LLM Frame Processor yields another Text Frame with the value “there.”. The Pipeline passes this frame to the TTS Frame Processor.
|
||||

|
||||
|
||||
8. The TTS Frame Processor now has a full sentence, so it starts streaming audio based on “Hi, there.” It yields the first chunk of streaming audio as an Audio frame, which the Pipeline passes to the LLM Assistant Message Aggregator.
|
||||

|
||||
|
||||
9. The LLM Assistant Message Aggregator doesn’t do anything with Audio frames, so it immediately yields the frame, unchanged. This is the convention for all Frame Processors: frames that the processor doesn’t process should be immediately yielded.
|
||||

|
||||
|
||||
10. The Pipeline places the first Audio frame in its sink queue, which is being watched by the Transport. Since the frame is now in a queue, the Pipeline can continue processing other frames. Note that the source and sink queues form a sort of “boundary of concurrent processing” between a Pipeline and the outside world. In a Pipeline, Frames are processed sequentially; once a Frame is on a queue it can be processed in parallel with the frames being processed by the Pipeline. TODO: link to a more in-depth section about this.
|
||||

|
||||
|
||||
11. The TTS Frame Processor yields another Audio frame as the Transport transmits the first Audio frame.
|
||||

|
||||
|
||||
12. As before, the LLM Assistant Message Aggregator immediately yields the Audio frame and the Pipeline places the Audio frame in the sink queue.
|
||||

|
||||
|
||||
13. The TTS Frame Processor has no more frames to yield. The LLM Frame Processor emits an LLM Response End Frame, which the Pipeline passes to the TTS Frame Processor.
|
||||

|
||||
|
||||
14. The TTS Frame Processor immediately yields the LLM Response End Frame, so the Pipeline passes it along to the LLM Assistant Message Aggregator. The LLM Assistant Message Aggregator updates the LLM Context with the full response from the LLM. TODO TODO: I realized I forgot that the TSS Frame Processor also yields the Text frames that the LLM emitted so that the LLM Assistant Message Aggregator could accumulate them, arrggh.
|
||||

|
||||
|
||||
15. The system is quiet, and waiting for the next message from the Transport.
|
||||

|
||||
BIN
docs/images/frame-progress-01.png
Normal file
|
After Width: | Height: | Size: 98 KiB |
BIN
docs/images/frame-progress-02.png
Normal file
|
After Width: | Height: | Size: 91 KiB |
BIN
docs/images/frame-progress-03.png
Normal file
|
After Width: | Height: | Size: 92 KiB |
BIN
docs/images/frame-progress-04.png
Normal file
|
After Width: | Height: | Size: 92 KiB |
BIN
docs/images/frame-progress-05.png
Normal file
|
After Width: | Height: | Size: 98 KiB |
BIN
docs/images/frame-progress-06.png
Normal file
|
After Width: | Height: | Size: 94 KiB |
BIN
docs/images/frame-progress-07.png
Normal file
|
After Width: | Height: | Size: 94 KiB |
BIN
docs/images/frame-progress-08.png
Normal file
|
After Width: | Height: | Size: 95 KiB |
BIN
docs/images/frame-progress-09.png
Normal file
|
After Width: | Height: | Size: 94 KiB |
BIN
docs/images/frame-progress-10.png
Normal file
|
After Width: | Height: | Size: 96 KiB |
BIN
docs/images/frame-progress-11.png
Normal file
|
After Width: | Height: | Size: 110 KiB |
BIN
docs/images/frame-progress-12.png
Normal file
|
After Width: | Height: | Size: 102 KiB |
BIN
docs/images/frame-progress-13.png
Normal file
|
After Width: | Height: | Size: 111 KiB |
BIN
docs/images/frame-progress-14.png
Normal file
|
After Width: | Height: | Size: 117 KiB |
BIN
docs/images/frame-progress-15.png
Normal file
|
After Width: | Height: | Size: 98 KiB |
35
dot-env.template
Normal file
@@ -0,0 +1,35 @@
|
||||
# Anthropic
|
||||
ANTHROPIC_API_KEY=...
|
||||
|
||||
# Azure
|
||||
AZURE_SPEECH_REGION=...
|
||||
AZURE_SPEECH_API_KEY=...
|
||||
|
||||
AZURE_CHATGPT_API_KEY=...
|
||||
AZURE_CHATGPT_ENDPOINT=https://...
|
||||
AZURE_CHATGPT_MODEL=...
|
||||
|
||||
AZURE_DALLE_API_KEY=...
|
||||
AZURE_DALLE_ENDPOINT=https://...
|
||||
AZURE_DALLE_MODEL=...
|
||||
|
||||
# Daily
|
||||
DAILY_API_KEY=...
|
||||
DAILY_SAMPLE_ROOM_URL=https://...
|
||||
|
||||
# ElevenLabs
|
||||
ELEVENLABS_API_KEY=...
|
||||
ELEVENLABS_VOICE_ID=...
|
||||
|
||||
# Fal
|
||||
FAL_KEY=...
|
||||
|
||||
# Fireworks
|
||||
FIREWORKS_API_KEY=...
|
||||
|
||||
# PlayHT
|
||||
PLAY_HT_USER_ID=...
|
||||
PLAY_HT_API_KEY=...
|
||||
|
||||
# OpenAI
|
||||
OPENAI_API_KEY=...
|
||||
56
examples/foundational/01-say-one-thing.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
53
examples/foundational/01a-local-audio.py
Normal file
@@ -0,0 +1,53 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = LocalAudioTransport(TransportParams(audio_out_enabled=True))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([tts, transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
async def say_something():
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frames([TextFrame("Hello there!"), EndFrame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await asyncio.gather(runner.run(task), say_something())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
68
examples/foundational/02-llm-say-one-thing.py
Normal file
@@ -0,0 +1,68 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Say One Thing From an LLM",
|
||||
DailyParams(audio_out_enabled=True))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an LLM in a WebRTC session, and this is a 'hello world' demo. Say hello to the world.",
|
||||
}]
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([llm, tts, transport.output()]))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await task.queue_frames([LLMMessagesFrame(messages), EndFrame()])
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
68
examples/foundational/03-still-frame.py
Normal file
@@ -0,0 +1,68 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Show a still frame image",
|
||||
DailyParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024
|
||||
)
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([imagegen, transport.output()]))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
# Note that we do not put an EndFrame() item in the pipeline for this demo.
|
||||
# This means that the bot will stay in the channel until it times out.
|
||||
# An EndFrame() in the pipeline would cause the transport to shut
|
||||
# down.
|
||||
await task.queue_frames([TextFrame("a cat in the style of picasso")])
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
68
examples/foundational/03a-local-still-frame.py
Normal file
@@ -0,0 +1,68 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Picasso Cat")
|
||||
|
||||
transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024))
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([imagegen, transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frames([TextFrame("a cat in the style of picasso")])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def run_tk():
|
||||
while runner.is_active():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
await asyncio.gather(runner.run(task), run_tk())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
86
examples/foundational/04-utterance-and-speech.py
Normal file
@@ -0,0 +1,86 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.merge_pipeline import SequentialMergePipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
|
||||
from pipecat.frames.frames import EndPipeFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.transport_services import TransportServiceOutput
|
||||
from pipecat.services.transports.daily_transport import DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(room_url, None, "Static And Dynamic Speech")
|
||||
|
||||
meeting = TransportServiceOutput(transport, mic_enabled=True)
|
||||
|
||||
llm = AzureLLMService(
|
||||
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
|
||||
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
|
||||
model=os.getenv("AZURE_CHATGPT_MODEL"),
|
||||
)
|
||||
azure_tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
elevenlabs_tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
messages = [{"role": "system",
|
||||
"content": "tell the user a joke about llamas"}]
|
||||
|
||||
# Start a task to run the LLM to create a joke, and convert the LLM
|
||||
# output to audio frames. This task will run in parallel with generating
|
||||
# and speaking the audio for static text, so there's no delay to speak
|
||||
# the LLM response.
|
||||
llm_pipeline = Pipeline([llm, elevenlabs_tts])
|
||||
llm_task = PipelineTask(llm_pipeline)
|
||||
await llm_task.queue_frames([LLMMessagesFrame(messages), EndPipeFrame()])
|
||||
|
||||
simple_tts_pipeline = Pipeline([azure_tts])
|
||||
await simple_tts_pipeline.queue_frames(
|
||||
[
|
||||
TextFrame("My friend the LLM is going to tell a joke about llamas."),
|
||||
EndPipeFrame(),
|
||||
]
|
||||
)
|
||||
|
||||
merge_pipeline = SequentialMergePipeline(
|
||||
[simple_tts_pipeline, llm_pipeline])
|
||||
|
||||
await asyncio.gather(
|
||||
transport.run(merge_pipeline),
|
||||
simple_tts_pipeline.run_pipeline(),
|
||||
llm_pipeline.run_pipeline(),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
164
examples/foundational/05-sync-speech-and-image.py
Normal file
@@ -0,0 +1,164 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
AppFrame,
|
||||
Frame,
|
||||
ImageRawFrame,
|
||||
TextFrame,
|
||||
EndFrame,
|
||||
LLMMessagesFrame,
|
||||
LLMResponseStartFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.aggregators.gated import GatedAggregator
|
||||
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.aggregators.parallel_task import ParallelTask
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
@dataclass
|
||||
class MonthFrame(AppFrame):
|
||||
month: str
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}(month: {self.month})"
|
||||
|
||||
|
||||
class MonthPrepender(FrameProcessor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.most_recent_month = "Placeholder, month frame not yet received"
|
||||
self.prepend_to_next_text_frame = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, MonthFrame):
|
||||
self.most_recent_month = frame.month
|
||||
elif self.prepend_to_next_text_frame and isinstance(frame, TextFrame):
|
||||
await self.push_frame(TextFrame(f"{self.most_recent_month}: {frame.text}"))
|
||||
self.prepend_to_next_text_frame = False
|
||||
elif isinstance(frame, LLMResponseStartFrame):
|
||||
self.prepend_to_next_text_frame = True
|
||||
await self.push_frame(frame)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Month Narration Bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
gated_aggregator = GatedAggregator(
|
||||
gate_open_fn=lambda frame: isinstance(frame, ImageRawFrame),
|
||||
gate_close_fn=lambda frame: isinstance(frame, LLMResponseStartFrame),
|
||||
start_open=False
|
||||
)
|
||||
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
month_prepender = MonthPrepender()
|
||||
llm_full_response_aggregator = LLMFullResponseAggregator()
|
||||
|
||||
pipeline = Pipeline([
|
||||
llm,
|
||||
sentence_aggregator,
|
||||
ParallelTask(
|
||||
[month_prepender, tts],
|
||||
[llm_full_response_aggregator, imagegen]
|
||||
),
|
||||
gated_aggregator,
|
||||
transport.output()
|
||||
])
|
||||
|
||||
frames = []
|
||||
for month in [
|
||||
"January",
|
||||
"February",
|
||||
"March",
|
||||
"April",
|
||||
"May",
|
||||
"June",
|
||||
"July",
|
||||
"August",
|
||||
"September",
|
||||
"October",
|
||||
"November",
|
||||
"December",
|
||||
]:
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.",
|
||||
}
|
||||
]
|
||||
frames.append(MonthFrame(month=month))
|
||||
frames.append(LLMMessagesFrame(messages))
|
||||
|
||||
frames.append(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await task.queue_frames(frames)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
164
examples/foundational/05a-local-sync-speech-and-image.py
Normal file
@@ -0,0 +1,164 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame, Frame, URLImageRawFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Calendar")
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def get_month_data(month):
|
||||
messages = [{"role": "system", "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.", }]
|
||||
|
||||
class ImageDescription(FrameProcessor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.text = ""
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, TextFrame):
|
||||
self.text = frame.text
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
class AudioGrabber(FrameProcessor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.audio = bytearray()
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
self.audio.extend(frame.audio)
|
||||
self.frame = AudioRawFrame(
|
||||
bytes(self.audio), frame.sample_rate, frame.num_channels)
|
||||
|
||||
class ImageGrabber(FrameProcessor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.frame = None
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, URLImageRawFrame):
|
||||
self.frame = frame
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"))
|
||||
|
||||
aggregator = LLMFullResponseAggregator()
|
||||
|
||||
description = ImageDescription()
|
||||
|
||||
audio_grabber = AudioGrabber()
|
||||
|
||||
image_grabber = ImageGrabber()
|
||||
|
||||
pipeline = Pipeline([llm, aggregator, description,
|
||||
ParallelPipeline([tts, audio_grabber],
|
||||
[imagegen, image_grabber])])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frame(LLMMessagesFrame(messages))
|
||||
await task.stop_when_done()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
return {
|
||||
"month": month,
|
||||
"text": description.text,
|
||||
"image": image_grabber.frame,
|
||||
"audio": audio_grabber.frame,
|
||||
}
|
||||
|
||||
transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024))
|
||||
|
||||
pipeline = Pipeline([transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
# We only specify 5 months as we create tasks all at once and we might
|
||||
# get rate limited otherwise.
|
||||
months: list[str] = [
|
||||
"January",
|
||||
"February",
|
||||
# "March",
|
||||
# "April",
|
||||
# "May",
|
||||
]
|
||||
|
||||
# We create one task per month. This will be executed concurrently.
|
||||
month_tasks = [asyncio.create_task(get_month_data(month)) for month in months]
|
||||
|
||||
# Now we wait for each month task in the order they're completed. The
|
||||
# benefit is we'll have as little delay as possible before the first
|
||||
# month, and likely no delay between months, but the months won't
|
||||
# display in order.
|
||||
async def show_images(month_tasks):
|
||||
for month_data_task in asyncio.as_completed(month_tasks):
|
||||
data = await month_data_task
|
||||
await task.queue_frames([data["image"], data["audio"]])
|
||||
|
||||
await runner.stop_when_done()
|
||||
|
||||
async def run_tk():
|
||||
while True:
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
await asyncio.gather(runner.run(task), show_images(month_tasks), run_tk())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
94
examples/foundational/06-listen-and-respond.py
Normal file
@@ -0,0 +1,94 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVAD
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True, # This is so Silero VAD can get audio data
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True
|
||||
)
|
||||
)
|
||||
|
||||
vad = SileroVAD()
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
|
||||
fl_in = FrameLogger("Inner")
|
||||
fl_out = FrameLogger("Outer")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so it should not contain special characters. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([fl_in, transport.input(), vad, tma_in, llm,
|
||||
fl_out, tts, tma_out, transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
116
examples/foundational/06a-image-sync.py
Normal file
@@ -0,0 +1,116 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import ImageRawFrame, Frame, SystemFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class ImageSyncAggregator(FrameProcessor):
|
||||
def __init__(self, speaking_path: str, waiting_path: str):
|
||||
super().__init__()
|
||||
self._speaking_image = Image.open(speaking_path)
|
||||
self._speaking_image_format = self._speaking_image.format
|
||||
self._speaking_image_bytes = self._speaking_image.tobytes()
|
||||
|
||||
self._waiting_image = Image.open(waiting_path)
|
||||
self._waiting_image_format = self._waiting_image.format
|
||||
self._waiting_image_bytes = self._waiting_image.tobytes()
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if not isinstance(frame, SystemFrame):
|
||||
await self.push_frame(ImageRawFrame(image=self._speaking_image_bytes, size=(1024, 1024), format=self._speaking_image_format))
|
||||
await self.push_frame(frame)
|
||||
await self.push_frame(ImageRawFrame(image=self._waiting_image_bytes, size=(1024, 1024), format=self._waiting_image_format))
|
||||
else:
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024,
|
||||
transcription_enabled=True
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so it should not contain special characters. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserContextAggregator(messages)
|
||||
tma_out = LLMAssistantContextAggregator(messages)
|
||||
|
||||
image_sync_aggregator = ImageSyncAggregator(
|
||||
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
|
||||
os.path.join(os.path.dirname(__file__), "assets", "waiting.png"),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([transport.input(), image_sync_aggregator,
|
||||
tma_in, llm, tma_out, tts, transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([TextFrame(f"Hi, this is {participant_name}.")])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
75
examples/foundational/07-interruptible.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import logging
|
||||
import os
|
||||
from pipecat.pipeline.aggregators import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.services.ai_services import FrameLogger
|
||||
from pipecat.transports.daily_transport import DailyTransport
|
||||
from pipecat.services.open_ai_services import OpenAILLMService
|
||||
from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService
|
||||
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
|
||||
logger = logging.getLogger("pipecat")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
duration_minutes=5,
|
||||
start_transcription=True,
|
||||
mic_enabled=True,
|
||||
mic_sample_rate=16000,
|
||||
camera_enabled=False,
|
||||
vad_enabled=True,
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
|
||||
pipeline = Pipeline([FrameLogger(), llm, FrameLogger(), tts])
|
||||
|
||||
@transport.event_handler("on_first_other_participant_joined")
|
||||
async def on_first_other_participant_joined(transport, participant):
|
||||
await transport.say("Hi, I'm listening!", tts)
|
||||
|
||||
async def run_conversation():
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
await transport.run_interruptible_pipeline(
|
||||
pipeline,
|
||||
post_processor=LLMAssistantResponseAggregator(messages),
|
||||
pre_processor=LLMUserResponseAggregator(messages),
|
||||
)
|
||||
|
||||
await asyncio.gather(transport.run(), run_conversation())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
148
examples/foundational/08-bots-arguing.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from typing import Tuple
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from pipecat.pipeline.aggregators import SentenceAggregator
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
|
||||
from pipecat.transports.daily_transport import DailyTransport
|
||||
from pipecat.services.azure_ai_services import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService
|
||||
from pipecat.services.fal_ai_services import FalImageGenService
|
||||
from pipecat.pipeline.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
|
||||
logger = logging.getLogger("pipecat")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Respond bot",
|
||||
duration_minutes=10,
|
||||
mic_enabled=True,
|
||||
mic_sample_rate=16000,
|
||||
camera_enabled=True,
|
||||
camera_width=1024,
|
||||
camera_height=1024,
|
||||
)
|
||||
|
||||
llm = AzureLLMService(
|
||||
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
|
||||
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
|
||||
model=os.getenv("AZURE_CHATGPT_MODEL"),
|
||||
)
|
||||
tts1 = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
tts2 = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="jBpfuIE2acCO8z3wKNLl",
|
||||
)
|
||||
dalle = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="1024x1024"
|
||||
),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
bot1_messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a stern librarian. You strongly believe that a hot dog is a sandwich. Start by stating this fact in a few sentences, then be prepared to debate this with the user. You shouldn't ever compromise on the fundamental truth that a hot dog is a sandwich. Your responses should only be a few sentences long.",
|
||||
},
|
||||
]
|
||||
bot2_messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a silly cat, and you strongly believe that a hot dog is not a sandwich. Debate this with the user, only responding with a few sentences. Don't ever accept that a hot dog is a sandwich.",
|
||||
},
|
||||
]
|
||||
|
||||
async def get_text_and_audio(messages) -> Tuple[str, bytearray]:
|
||||
"""This function streams text from the LLM and uses the TTS service to convert
|
||||
that text to speech as it's received. """
|
||||
source_queue = asyncio.Queue()
|
||||
sink_queue = asyncio.Queue()
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
pipeline = Pipeline(
|
||||
[llm, sentence_aggregator, tts1], source_queue, sink_queue
|
||||
)
|
||||
|
||||
await source_queue.put(LLMMessagesFrame(messages))
|
||||
await source_queue.put(EndFrame())
|
||||
await pipeline.run_pipeline()
|
||||
|
||||
message = ""
|
||||
all_audio = bytearray()
|
||||
while sink_queue.qsize():
|
||||
frame = sink_queue.get_nowait()
|
||||
if isinstance(frame, TextFrame):
|
||||
message += frame.text
|
||||
elif isinstance(frame, AudioFrame):
|
||||
all_audio.extend(frame.audio)
|
||||
|
||||
return (message, all_audio)
|
||||
|
||||
async def get_bot1_statement():
|
||||
message, audio = await get_text_and_audio(bot1_messages)
|
||||
|
||||
bot1_messages.append({"role": "assistant", "content": message})
|
||||
bot2_messages.append({"role": "user", "content": message})
|
||||
|
||||
return audio
|
||||
|
||||
async def get_bot2_statement():
|
||||
message, audio = await get_text_and_audio(bot2_messages)
|
||||
|
||||
bot2_messages.append({"role": "assistant", "content": message})
|
||||
bot1_messages.append({"role": "user", "content": message})
|
||||
|
||||
return audio
|
||||
|
||||
async def argue():
|
||||
for i in range(100):
|
||||
print(f"In iteration {i}")
|
||||
|
||||
bot1_description = "A woman conservatively dressed as a librarian in a library surrounded by books, cartoon, serious, highly detailed"
|
||||
|
||||
(audio1, image_data1) = await asyncio.gather(
|
||||
get_bot1_statement(), dalle.run_image_gen(bot1_description)
|
||||
)
|
||||
await transport.send_queue.put(
|
||||
[
|
||||
ImageFrame(image_data1[1], image_data1[2]),
|
||||
AudioFrame(audio1),
|
||||
]
|
||||
)
|
||||
|
||||
bot2_description = "A cat dressed in a hot dog costume, cartoon, bright colors, funny, highly detailed"
|
||||
|
||||
(audio2, image_data2) = await asyncio.gather(
|
||||
get_bot2_statement(), dalle.run_image_gen(bot2_description)
|
||||
)
|
||||
await transport.send_queue.put(
|
||||
[
|
||||
ImageFrame(image_data2[1], image_data2[2]),
|
||||
AudioFrame(audio2),
|
||||
]
|
||||
)
|
||||
|
||||
await asyncio.gather(transport.run(), argue())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
62
examples/foundational/09-mirror.py
Normal file
@@ -0,0 +1,62 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame, ImageRawFrame
|
||||
from pipecat.processors.filter import Filter
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.transports.services.daily import DailyTransport, DailyParams
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url, token):
|
||||
transport = DailyTransport(
|
||||
room_url, token, "Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
|
||||
# The ParallelPipeline is not really necessary here but it shows how you
|
||||
# would process audio and video concurrently in parallel pipelines.
|
||||
pipeline = Pipeline([transport.input(),
|
||||
ParallelPipeline(
|
||||
[Filter([AudioRawFrame])],
|
||||
[Filter([ImageRawFrame])]),
|
||||
transport.output()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
65
examples/foundational/09a-local-mirror.py
Normal file
@@ -0,0 +1,65 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url, token):
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Local Mirror")
|
||||
|
||||
daily_transport = DailyTransport(room_url, token, "Test", DailyParams(audio_in_enabled=True))
|
||||
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720))
|
||||
|
||||
@daily_transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
|
||||
pipeline = Pipeline([daily_transport.input(), tk_transport.output()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def run_tk():
|
||||
while runner.is_active():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await asyncio.gather(runner.run(task), run_tk())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
181
examples/foundational/10-wake-word.py
Normal file
@@ -0,0 +1,181 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
SystemFrame,
|
||||
TextFrame,
|
||||
ImageRawFrame,
|
||||
SpriteFrame,
|
||||
TranscriptionFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import (
|
||||
LLMUserContextAggregator,
|
||||
LLMAssistantContextAggregator,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
sprites = {}
|
||||
image_files = [
|
||||
"sc-default.png",
|
||||
"sc-talk.png",
|
||||
"sc-listen-1.png",
|
||||
"sc-think-1.png",
|
||||
"sc-think-2.png",
|
||||
"sc-think-3.png",
|
||||
"sc-think-4.png",
|
||||
]
|
||||
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
for file in image_files:
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, "assets", file)
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites[file] = ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format)
|
||||
|
||||
# When the bot isn't talking, show a static image of the cat listening
|
||||
quiet_frame = sprites["sc-listen-1.png"]
|
||||
|
||||
# When the bot is talking, build an animation from two sprites
|
||||
talking_list = [sprites["sc-default.png"], sprites["sc-talk.png"]]
|
||||
talking = [random.choice(talking_list) for x in range(30)]
|
||||
talking_frame = SpriteFrame(talking)
|
||||
|
||||
# TODO: Support "thinking" as soon as we get a valid transcript, while LLM
|
||||
# is processing
|
||||
thinking_list = [
|
||||
sprites["sc-think-1.png"],
|
||||
sprites["sc-think-2.png"],
|
||||
sprites["sc-think-3.png"],
|
||||
sprites["sc-think-4.png"],
|
||||
]
|
||||
thinking_frame = SpriteFrame(thinking_list)
|
||||
|
||||
|
||||
class NameCheckFilter(FrameProcessor):
|
||||
def __init__(self, names: list[str]):
|
||||
super().__init__()
|
||||
self._names = names
|
||||
self._sentence = ""
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, SystemFrame):
|
||||
await self.push_frame(frame, direction)
|
||||
return
|
||||
|
||||
content: str = ""
|
||||
|
||||
# TODO: split up transcription by participant
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
content = frame.text
|
||||
self._sentence += content
|
||||
if self._sentence.endswith((".", "?", "!")):
|
||||
if any(name in self._sentence for name in self._names):
|
||||
await self.push_frame(TextFrame(self._sentence))
|
||||
self._sentence = ""
|
||||
else:
|
||||
self._sentence = ""
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class ImageSyncAggregator(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await self.push_frame(talking_frame)
|
||||
await self.push_frame(frame)
|
||||
await self.push_frame(quiet_frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Santa Cat",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=720,
|
||||
camera_out_height=1280,
|
||||
camera_out_framerate=10,
|
||||
transcription_enabled=True
|
||||
)
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="jBpfuIE2acCO8z3wKNLl",
|
||||
)
|
||||
isa = ImageSyncAggregator()
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Santa Cat, a cat that lives in Santa's workshop at the North Pole. You should be clever, and a bit sarcastic. You should also tell jokes every once in a while. Your responses should only be a few sentences long.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserContextAggregator(messages)
|
||||
tma_out = LLMAssistantContextAggregator(messages)
|
||||
ncf = NameCheckFilter(["Santa Cat", "Santa"])
|
||||
|
||||
pipeline = Pipeline([transport.input(), isa, ncf, tma_in,
|
||||
llm, tma_out, tts, transport.output()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
# Send some greeting at the beginning.
|
||||
await tts.say("Hi! If you want to talk to me, just say 'hey Santa Cat'.")
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
async def starting_image():
|
||||
await transport.send_image(quiet_frame)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await asyncio.gather(runner.run(task), starting_image())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
132
examples/foundational/11-sound-effects.py
Normal file
@@ -0,0 +1,132 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import wave
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
AudioRawFrame,
|
||||
LLMResponseEndFrame,
|
||||
LLMMessagesFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import (
|
||||
LLMUserContextAggregator,
|
||||
LLMAssistantContextAggregator,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
sounds = {}
|
||||
sound_files = ["ding1.wav", "ding2.wav"]
|
||||
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
for file in sound_files:
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, "assets", file)
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the image and convert it to bytes
|
||||
with wave.open(full_path) as audio_file:
|
||||
sounds[file] = AudioRawFrame(audio_file.readframes(-1),
|
||||
audio_file.getframerate(), audio_file.getnchannels())
|
||||
|
||||
|
||||
class OutboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, LLMResponseEndFrame):
|
||||
await self.push_frame(sounds["ding1.wav"])
|
||||
# In case anything else downstream needs it
|
||||
await self.push_frame(frame, direction)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class InboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, LLMMessagesFrame):
|
||||
await self.push_frame(sounds["ding2.wav"])
|
||||
# In case anything else downstream needs it
|
||||
await self.push_frame(frame, direction)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(audio_out_enabled=True, transcription_enabled=True)
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4-turbo-preview")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="ErXwobaYiN019PkySvjV",
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserContextAggregator(messages)
|
||||
tma_out = LLMAssistantContextAggregator(messages)
|
||||
out_sound = OutboundSoundEffectWrapper()
|
||||
in_sound = InboundSoundEffectWrapper()
|
||||
fl = FrameLogger("LLM Out")
|
||||
fl2 = FrameLogger("Transcription In")
|
||||
|
||||
pipeline = Pipeline([transport.input(), tma_in, in_sound, fl2, llm,
|
||||
tma_out, fl, tts, out_sound, transport.output()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await tts.say("Hi, I'm listening!")
|
||||
await transport.send_audio(sounds["ding1.wav"])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
104
examples/foundational/12-describe-video.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVAD
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
|
||||
def set_participant_id(self, participant_id: str):
|
||||
self._participant_id = participant_id
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Describe participant video",
|
||||
DailyParams(
|
||||
audio_in_enabled=True, # This is so Silero VAD can get audio data
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True
|
||||
)
|
||||
)
|
||||
|
||||
vad = SileroVAD()
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
|
||||
image_requester = UserImageRequester()
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
# If you run into weird description, try with use_cpu=True
|
||||
moondream = MoondreamService()
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
transport.capture_participant_video(participant["id"], framerate=0)
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline([transport.input(), vad, user_response, image_requester,
|
||||
vision_aggregator, moondream, tts, transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
55
examples/foundational/13-whisper-transcription.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.whisper import WhisperSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
transport = DailyTransport(room_url, None, "Transcription bot",
|
||||
DailyParams(audio_in_enabled=True))
|
||||
|
||||
stt = WhisperSTTService()
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
55
examples/foundational/13a-whisper-local.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.whisper import WhisperSTTService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
transport = LocalAudioTransport(TransportParams(audio_in_enabled=True))
|
||||
|
||||
stt = WhisperSTTService()
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
BIN
examples/foundational/assets/ding1.wav
Normal file
BIN
examples/foundational/assets/ding2.wav
Normal file
|
Before Width: | Height: | Size: 871 KiB After Width: | Height: | Size: 871 KiB |
|
Before Width: | Height: | Size: 868 KiB After Width: | Height: | Size: 868 KiB |
BIN
examples/foundational/assets/sc-listen-2.png
Normal file
|
After Width: | Height: | Size: 868 KiB |
|
Before Width: | Height: | Size: 870 KiB After Width: | Height: | Size: 870 KiB |
|
Before Width: | Height: | Size: 871 KiB After Width: | Height: | Size: 871 KiB |
BIN
examples/foundational/assets/sc-think-2.png
Normal file
|
After Width: | Height: | Size: 871 KiB |
BIN
examples/foundational/assets/sc-think-3.png
Normal file
|
After Width: | Height: | Size: 872 KiB |
BIN
examples/foundational/assets/sc-think-4.png
Normal file
|
After Width: | Height: | Size: 868 KiB |
BIN
examples/foundational/assets/speaking.png
Normal file
|
After Width: | Height: | Size: 33 KiB |
BIN
examples/foundational/assets/waiting.png
Normal file
|
After Width: | Height: | Size: 30 KiB |
58
examples/foundational/runner.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
|
||||
def configure():
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Daily API Key (needed to create an owner token for the room)",
|
||||
)
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
|
||||
key = args.apikey or os.getenv("DAILY_API_KEY")
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return (url, token)
|
||||
25
examples/foundational/websocket-server/frames.proto
Normal file
@@ -0,0 +1,25 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package pipecat_proto;
|
||||
|
||||
message TextFrame {
|
||||
string text = 1;
|
||||
}
|
||||
|
||||
message AudioFrame {
|
||||
bytes audio = 1;
|
||||
}
|
||||
|
||||
message TranscriptionFrame {
|
||||
string text = 1;
|
||||
string participant_id = 2;
|
||||
string timestamp = 3;
|
||||
}
|
||||
|
||||
message Frame {
|
||||
oneof frame {
|
||||
TextFrame text = 1;
|
||||
AudioFrame audio = 2;
|
||||
TranscriptionFrame transcription = 3;
|
||||
}
|
||||
}
|
||||
134
examples/foundational/websocket-server/index.html
Normal file
@@ -0,0 +1,134 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<script src="//cdn.jsdelivr.net/npm/protobufjs@7.X.X/dist/protobuf.min.js"></script>
|
||||
<title>WebSocket Audio Stream</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>WebSocket Audio Stream</h1>
|
||||
<button id="startAudioBtn">Start Audio</button>
|
||||
<button id="stopAudioBtn">Stop Audio</button>
|
||||
<script>
|
||||
const SAMPLE_RATE = 16000;
|
||||
const BUFFER_SIZE = 8192;
|
||||
const MIN_AUDIO_SIZE = 6400;
|
||||
|
||||
let audioContext;
|
||||
let microphoneStream;
|
||||
let scriptProcessor;
|
||||
let source;
|
||||
let frame;
|
||||
let audioChunks = [];
|
||||
let isPlaying = false;
|
||||
let ws;
|
||||
|
||||
const proto = protobuf.load("frames.proto", (err, root) => {
|
||||
if (err) throw err;
|
||||
frame = root.lookupType("pipecat_proto.Frame");
|
||||
});
|
||||
|
||||
function initWebSocket() {
|
||||
ws = new WebSocket('ws://localhost:8765');
|
||||
|
||||
ws.addEventListener('open', () => console.log('WebSocket connection established.'));
|
||||
ws.addEventListener('message', handleWebSocketMessage);
|
||||
ws.addEventListener('close', (event) => console.log("WebSocket connection closed.", event.code, event.reason));
|
||||
ws.addEventListener('error', (event) => console.error('WebSocket error:', event));
|
||||
}
|
||||
|
||||
async function handleWebSocketMessage(event) {
|
||||
const arrayBuffer = await event.data.arrayBuffer();
|
||||
enqueueAudioFromProto(arrayBuffer);
|
||||
}
|
||||
|
||||
function enqueueAudioFromProto(arrayBuffer) {
|
||||
const parsedFrame = frame.decode(new Uint8Array(arrayBuffer));
|
||||
if (!parsedFrame?.audio) return false;
|
||||
|
||||
const frameCount = parsedFrame.audio.data.length / 2;
|
||||
const audioOutBuffer = audioContext.createBuffer(1, frameCount, SAMPLE_RATE);
|
||||
const nowBuffering = audioOutBuffer.getChannelData(0);
|
||||
const view = new Int16Array(parsedFrame.audio.data.buffer);
|
||||
|
||||
for (let i = 0; i < frameCount; i++) {
|
||||
const word = view[i];
|
||||
nowBuffering[i] = ((word + 32768) % 65536 - 32768) / 32768.0;
|
||||
}
|
||||
|
||||
audioChunks.push(audioOutBuffer);
|
||||
if (!isPlaying) playNextChunk();
|
||||
}
|
||||
|
||||
function playNextChunk() {
|
||||
if (audioChunks.length === 0) {
|
||||
isPlaying = false;
|
||||
return;
|
||||
}
|
||||
|
||||
isPlaying = true;
|
||||
const audioOutBuffer = audioChunks.shift();
|
||||
const source = audioContext.createBufferSource();
|
||||
source.buffer = audioOutBuffer;
|
||||
source.connect(audioContext.destination);
|
||||
source.onended = playNextChunk;
|
||||
source.start();
|
||||
}
|
||||
|
||||
function startAudio() {
|
||||
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
||||
alert('getUserMedia is not supported in your browser.');
|
||||
return;
|
||||
}
|
||||
|
||||
navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
.then((stream) => {
|
||||
microphoneStream = stream;
|
||||
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
scriptProcessor = audioContext.createScriptProcessor(BUFFER_SIZE, 1, 1);
|
||||
source = audioContext.createMediaStreamSource(stream);
|
||||
source.connect(scriptProcessor);
|
||||
scriptProcessor.connect(audioContext.destination);
|
||||
|
||||
const audioBuffer = [];
|
||||
const skipRatio = Math.floor(audioContext.sampleRate / (SAMPLE_RATE * 2));
|
||||
|
||||
scriptProcessor.onaudioprocess = (event) => {
|
||||
const rawLeftChannelData = event.inputBuffer.getChannelData(0);
|
||||
for (let i = 0; i < rawLeftChannelData.length; i += skipRatio) {
|
||||
const normalized = ((rawLeftChannelData[i] * 32768.0) + 32768) % 65536 - 32768;
|
||||
const swappedBytes = ((normalized & 0xff) << 8) | ((normalized >> 8) & 0xff);
|
||||
audioBuffer.push(swappedBytes);
|
||||
}
|
||||
|
||||
if (audioBuffer.length >= MIN_AUDIO_SIZE) {
|
||||
const audioFrame = frame.create({ audio: { audio: audioBuffer.slice(0, MIN_AUDIO_SIZE) } });
|
||||
const encodedFrame = new Uint8Array(frame.encode(audioFrame).finish());
|
||||
ws.send(encodedFrame);
|
||||
audioBuffer.splice(0, MIN_AUDIO_SIZE);
|
||||
}
|
||||
};
|
||||
|
||||
initWebSocket();
|
||||
})
|
||||
.catch((error) => console.error('Error accessing microphone:', error));
|
||||
}
|
||||
|
||||
function stopAudio() {
|
||||
if (ws) {
|
||||
ws.close();
|
||||
scriptProcessor.disconnect();
|
||||
source.disconnect();
|
||||
ws = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById('startAudioBtn').addEventListener('click', startAudio);
|
||||
document.getElementById('stopAudioBtn').addEventListener('click', stopAudio);
|
||||
</script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
50
examples/foundational/websocket-server/sample.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import logging
|
||||
import os
|
||||
from pipecat.pipeline.frame_processor import FrameProcessor
|
||||
from pipecat.pipeline.frames import TextFrame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService
|
||||
from pipecat.transports.websocket_transport import WebsocketTransport
|
||||
from pipecat.services.whisper_ai_services import WhisperSTTService
|
||||
|
||||
logging.basicConfig(format="%(levelno)s %(asctime)s %(message)s")
|
||||
logger = logging.getLogger("pipecat")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
class WhisperTranscriber(FrameProcessor):
|
||||
async def process_frame(self, frame):
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcribed: {frame.text}")
|
||||
else:
|
||||
yield frame
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = WebsocketTransport(
|
||||
mic_enabled=True,
|
||||
speaker_enabled=True,
|
||||
)
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([
|
||||
WhisperSTTService(),
|
||||
WhisperTranscriber(),
|
||||
tts,
|
||||
])
|
||||
|
||||
@transport.on_connection
|
||||
async def queue_frame():
|
||||
await pipeline.queue_frames([TextFrame("Hello there!")])
|
||||
|
||||
await transport.run(pipeline)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -5,61 +5,63 @@ import time
|
||||
import urllib.parse
|
||||
import random
|
||||
|
||||
from dailyai.services.daily_transport_service import DailyTransportService
|
||||
from dailyai.services.azure_ai_services import AzureLLMService, AzureTTSService
|
||||
from dailyai.queue_frame import QueueFrame, FrameType
|
||||
from dailyai.services.fal_ai_services import FalImageGenService
|
||||
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
|
||||
from pipecat.transports.daily_transport import DailyTransport
|
||||
from pipecat.services.azure_ai_services import AzureLLMService, AzureTTSService
|
||||
from pipecat.pipeline.frames import Frame
|
||||
from pipecat.services.fal_ai_services import FalImageGenService
|
||||
|
||||
async def main(room_url:str, token):
|
||||
|
||||
async def main(room_url: str, token):
|
||||
global transport
|
||||
global llm
|
||||
global tts
|
||||
|
||||
transport = DailyTransportService(
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Imagebot",
|
||||
1,
|
||||
)
|
||||
transport.mic_enabled = True
|
||||
transport.camera_enabled = True
|
||||
transport.mic_sample_rate = 16000
|
||||
transport.camera_width = 1024
|
||||
transport.camera_height = 1024
|
||||
transport._mic_enabled = True
|
||||
transport._camera_enabled = True
|
||||
transport._mic_sample_rate = 16000
|
||||
transport._camera_width = 1024
|
||||
transport._camera_height = 1024
|
||||
|
||||
llm = AzureLLMService()
|
||||
tts = AzureTTSService()
|
||||
img = FalImageGenService()
|
||||
|
||||
|
||||
async def handle_transcriptions():
|
||||
print("handle_transcriptions got called")
|
||||
|
||||
sentence = ""
|
||||
async for message in transport.get_transcriptions():
|
||||
print(f"transcription message: {message}")
|
||||
if message["session_id"] == transport.my_participant_id:
|
||||
if message["session_id"] == transport._my_participant_id:
|
||||
continue
|
||||
finder = message["text"].find("start over")
|
||||
finder = message["text"].find("start over")
|
||||
print(f"finder: {finder}")
|
||||
if finder >= 0:
|
||||
async for audio in tts.run_tts(f"Resetting."):
|
||||
transport.output_queue.put(QueueFrame(FrameType.AUDIO_FRAME, audio))
|
||||
transport.output_queue.put(
|
||||
Frame(FrameType.AUDIO_FRAME, audio))
|
||||
sentence = ""
|
||||
continue
|
||||
# todo: we could differentiate between transcriptions from different participants
|
||||
# todo: we could differentiate between transcriptions from
|
||||
# different participants
|
||||
sentence += f" {message['text']}"
|
||||
print(f"sentence is now: {sentence}")
|
||||
# TODO: Cache this audio
|
||||
phrase = random.choice(["OK.", "Got it.", "Sure.", "You bet.", "Sure thing."])
|
||||
phrase = random.choice(
|
||||
["OK.", "Got it.", "Sure.", "You bet.", "Sure thing."])
|
||||
async for audio in tts.run_tts(phrase):
|
||||
transport.output_queue.put(QueueFrame(FrameType.AUDIO_FRAME, audio))
|
||||
transport.output_queue.put(Frame(FrameType.AUDIO_FRAME, audio))
|
||||
img_result = img.run_image_gen(sentence, "1024x1024")
|
||||
awaited_img = await asyncio.gather(img_result)
|
||||
transport.output_queue.put(
|
||||
[
|
||||
QueueFrame(FrameType.IMAGE_FRAME, awaited_img[0][1]),
|
||||
Frame(FrameType.IMAGE_FRAME, awaited_img[0][1]),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -69,20 +71,22 @@ async def main(room_url:str, token):
|
||||
if participant["info"]["isLocal"]:
|
||||
return
|
||||
async for audio in tts.run_tts("Describe an image, and I'll create it."):
|
||||
audio_generator = tts.run_tts(f"Hello, {participant['info']['userName']}! Describe an image and I'll create it. To start over, just say 'start over'.")
|
||||
audio_generator = tts.run_tts(
|
||||
f"Hello, {participant['info']['userName']}! Describe an image and I'll create it. To start over, just say 'start over'.")
|
||||
async for audio in audio_generator:
|
||||
transport.output_queue.put(QueueFrame(FrameType.AUDIO_FRAME, audio))
|
||||
transport.output_queue.put(Frame(FrameType.AUDIO_FRAME, audio))
|
||||
|
||||
transport.transcription_settings["extra"]["punctuate"] = False
|
||||
transport.transcription_settings["extra"]["endpointing"] = False
|
||||
await asyncio.gather(transport.run(), handle_transcriptions())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Simple Daily Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u", "--url", type=str, required=True, help="URL of the Daily room to join"
|
||||
)
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=True,
|
||||
help="URL of the Daily room to join")
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -93,20 +97,25 @@ if __name__ == "__main__":
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in the future.
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(args.url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={"Authorization": f"Bearer {args.apikey}"},
|
||||
headers={
|
||||
"Authorization": f"Bearer {args.apikey}"},
|
||||
json={
|
||||
"properties": {"room_name": room_name, "is_owner": True, "exp": expiration}
|
||||
},
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
135
examples/internal/11a-dial-out.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import wave
|
||||
|
||||
from pipecat.transports.daily_transport import DailyTransport
|
||||
from pipecat.services.azure_ai_services import AzureLLMService, AzureTTSService
|
||||
from pipecat.pipeline.aggregators import LLMContextAggregator
|
||||
from pipecat.services.ai_services import AIService, FrameLogger
|
||||
from pipecat.pipeline.frames import Frame, AudioFrame, LLMResponseEndFrame, LLMMessagesFrame
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
sounds = {}
|
||||
sound_files = [
|
||||
'ding1.wav',
|
||||
'ding2.wav'
|
||||
]
|
||||
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
for file in sound_files:
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, "assets", file)
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the image and convert it to bytes
|
||||
with wave.open(full_path) as audio_file:
|
||||
sounds[file] = audio_file.readframes(-1)
|
||||
|
||||
|
||||
class OutboundSoundEffectWrapper(AIService):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
|
||||
if isinstance(frame, LLMResponseEndFrame):
|
||||
yield AudioFrame(sounds["ding1.wav"])
|
||||
# In case anything else up the stack needs it
|
||||
yield frame
|
||||
else:
|
||||
yield frame
|
||||
|
||||
|
||||
class InboundSoundEffectWrapper(AIService):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
|
||||
if isinstance(frame, LLMMessagesFrame):
|
||||
yield AudioFrame(sounds["ding2.wav"])
|
||||
# In case anything else up the stack needs it
|
||||
yield frame
|
||||
else:
|
||||
yield frame
|
||||
|
||||
|
||||
async def main(room_url: str, token, phone):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
|
||||
global transport
|
||||
global llm
|
||||
global tts
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
300,
|
||||
)
|
||||
transport._mic_enabled = True
|
||||
transport._mic_sample_rate = 16000
|
||||
transport._camera_enabled = False
|
||||
|
||||
llm = AzureLLMService()
|
||||
tts = AzureTTSService()
|
||||
|
||||
@transport.event_handler("on_first_other_participant_joined")
|
||||
async def on_first_other_participant_joined(transport, participant):
|
||||
await tts.say("Hi, I'm listening!", transport.send_queue)
|
||||
await transport.send_queue.put(AudioFrame(sounds["ding1.wav"]))
|
||||
|
||||
async def handle_transcriptions():
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way."},
|
||||
]
|
||||
|
||||
tma_in = LLMContextAggregator(
|
||||
messages, "user", transport._my_participant_id
|
||||
)
|
||||
tma_out = LLMContextAggregator(
|
||||
messages, "assistant", transport._my_participant_id
|
||||
)
|
||||
out_sound = OutboundSoundEffectWrapper()
|
||||
in_sound = InboundSoundEffectWrapper()
|
||||
fl = FrameLogger("LLM Out")
|
||||
fl2 = FrameLogger("Transcription In")
|
||||
await out_sound.run_to_queue(
|
||||
transport.send_queue,
|
||||
tts.run(
|
||||
tma_out.run(
|
||||
llm.run(
|
||||
fl2.run(
|
||||
in_sound.run(
|
||||
tma_in.run(
|
||||
transport.get_receive_frames()
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def pax_joined(transport, pax):
|
||||
print(f"PARTICIPANT JOINED: {pax}")
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if (state == "joined"):
|
||||
if (phone):
|
||||
transport.start_recording()
|
||||
transport.dialout(phone)
|
||||
|
||||
await asyncio.gather(transport.run(), handle_transcriptions())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
34
examples/server/README.md
Normal file
@@ -0,0 +1,34 @@
|
||||
# Server Example
|
||||
|
||||
Use this server app to quickly host a bot on the web:
|
||||
|
||||
```
|
||||
flask --app daily-bot-manager.py --debug run
|
||||
```
|
||||
|
||||
It's currently configured to serve example apps defined in the APPS constant in the server file:
|
||||
|
||||
```
|
||||
chatbot
|
||||
patient-intake
|
||||
storybot
|
||||
translator
|
||||
```
|
||||
|
||||
Once the server is started, you can create a bot instance by opening `http://127.0.0.1:5000/start/chatbot` in a browser, and the server will do the following:
|
||||
|
||||
- Create a new, randomly-named Daily room with `DAILY_API_KEY` from your .env file or environment
|
||||
- Start an instance of `chatbot.py` and connect it to that room
|
||||
- 301 redirect your browser to the room
|
||||
|
||||
### Options
|
||||
|
||||
The server supports several options, which can be set in the body of a POST request, or as params in the URL of a GET request.
|
||||
|
||||
- `room_url` (default: none): A room URL to join. If empty, the server will create a Daily room and return the URL in the response.
|
||||
room_properties (none): A JSON object (URL encoded if included as a GET parameter) for overriding default room creation properties, as described here: https://docs.daily.co/reference/rest-api/rooms/create-room This will be ignored if a room_url is provided.
|
||||
- `token_properties` (none): A JSON object (URL encoded if included as a GET parameter) for overriding default token properties. By default, the server creates an owner token with an expiration time of one hour.
|
||||
- `duration` (7200 seconds, or two hours): Use this property to set a time limit for the bot, as well as an expiration time for the room (if the server is creating one). This will not add an expiration time to an existing room. Expiration times in `token_properties` or `room_properties` will also take precedence over this value. You can set this property to `0` to disable timeouts, but this isn't recommended.
|
||||
- `bot_args` (none): A string containing any additional command-line args to pass to the bot.
|
||||
- `wait_for_bot` (true): Whether to wait for the bot to successfully join the room before returning a response from the server. If true, the server will start the bot script, then poll the room for up to 5 seconds to confirm the bot has joined the room. If it doesn't, the server will stop the bot and return a 500 response. If set to `false`, the server will start the bot, but immediately return a 200 response. This can be useful if the server is creating rooms for you, and you need the room URL to join the user to the room.
|
||||
- `redirect` (true): Instead of returning a 200 for GET requests, the server will return a 301 redirect to the ROOM_URL. This is handy for testing by creating a bot with a GET request directly in the browser. POST requests will never return redirects. Set to `false` to get 200 responses with info in a JSON object even for GET requests.
|
||||
165
examples/server/daily-bot-manager.py
Normal file
@@ -0,0 +1,165 @@
|
||||
import os
|
||||
import requests
|
||||
import urllib
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from flask import Flask, jsonify, redirect, request
|
||||
from flask_cors import CORS
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
APPS = {
|
||||
"chatbot": "../starter-apps/chatbot.py",
|
||||
"patient-intake": "../starter-apps/patient-intake.py",
|
||||
"storybot": "../starter-apps/storybot.py",
|
||||
"translator": "../starter-apps/translator.py"
|
||||
}
|
||||
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
api_path = os.getenv("DAILY_API_PATH") or "https://api.daily.co/v1"
|
||||
|
||||
|
||||
def get_room_name(room_url):
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def create_room(room_properties, exp):
|
||||
room_props = {
|
||||
"exp": exp,
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False,
|
||||
"enable_recording": "cloud"
|
||||
}
|
||||
if room_properties:
|
||||
room_props |= room_properties
|
||||
|
||||
res = requests.post(
|
||||
f"{api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
room_url = res.json()["url"]
|
||||
room_name = res.json()["name"]
|
||||
return (room_url, room_name)
|
||||
|
||||
|
||||
def create_token(room_name, token_properties, exp):
|
||||
token_props = {"exp": exp, "is_owner": True}
|
||||
if token_properties:
|
||||
token_props |= token_properties
|
||||
# Force the token to be limited to the room
|
||||
token_props |= {"room_name": room_name}
|
||||
res = requests.post(
|
||||
f'{api_path}/meeting-tokens',
|
||||
headers={
|
||||
'Authorization': f'Bearer {daily_api_key}'},
|
||||
json={
|
||||
'properties': token_props})
|
||||
if res.status_code != 200:
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create meeting token: {res.text}")
|
||||
|
||||
meeting_token = res.json()['token']
|
||||
return meeting_token
|
||||
|
||||
|
||||
def start_bot(*, bot_path, room_url, token, bot_args, wait_for_bot):
|
||||
|
||||
room_name = get_room_name(room_url)
|
||||
proc = subprocess.Popen(
|
||||
[f"python {bot_path} -u {room_url} -t {token} -k {daily_api_key} {bot_args}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
)
|
||||
|
||||
if wait_for_bot:
|
||||
# Don't return until the bot has joined the room, but wait for at most 5
|
||||
# seconds.
|
||||
attempts = 0
|
||||
while attempts < 50:
|
||||
time.sleep(0.1)
|
||||
attempts += 1
|
||||
res = requests.get(
|
||||
f"{api_path}/rooms/{room_name}/get-session-data",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
)
|
||||
if res.status_code == 200:
|
||||
print(f"Took {attempts} attempts to join room {room_name}")
|
||||
return True
|
||||
|
||||
# If we don't break from the loop, that means we never found the bot in the room
|
||||
raise Exception("The bot was unable to join the room. Please try again.")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@app.route("/start/<string:botname>", methods=["GET", "POST"])
|
||||
def start(botname):
|
||||
try:
|
||||
if botname not in APPS:
|
||||
raise Exception(f"Bot '{botname}' is not in the allowlist.")
|
||||
|
||||
bot_path = APPS[botname]
|
||||
props = {
|
||||
"room_url": None,
|
||||
"room_properties": None,
|
||||
"token_properties": None,
|
||||
"bot_args": None,
|
||||
"wait_for_bot": True,
|
||||
"duration": None,
|
||||
"redirect": True
|
||||
}
|
||||
props |= request.values.to_dict() # gets URL params as well as plaintext POST body
|
||||
try:
|
||||
props |= request.json
|
||||
except BaseException:
|
||||
pass
|
||||
if props['redirect'] == "false":
|
||||
props['redirect'] = False
|
||||
if props['wait_for_bot'] == "false":
|
||||
props['wait_for_bot'] = False
|
||||
|
||||
duration = int(os.getenv("DAILY_BOT_DURATION") or 7200)
|
||||
if props['duration']:
|
||||
duration = props['duration']
|
||||
exp = time.time() + duration
|
||||
if (props['room_url']):
|
||||
room_url = props['room_url']
|
||||
try:
|
||||
room_name = get_room_name(room_url)
|
||||
except ValueError:
|
||||
raise Exception(
|
||||
"There was a problem detecting the room name. Please double-check the value of room_url.")
|
||||
else:
|
||||
room_url, room_name = create_room(props['room_properties'], exp)
|
||||
token = create_token(room_name, props['token_properties'], exp)
|
||||
bot = start_bot(
|
||||
room_url=room_url,
|
||||
bot_path=bot_path,
|
||||
token=token,
|
||||
bot_args=props['bot_args'],
|
||||
wait_for_bot=props['wait_for_bot'])
|
||||
|
||||
if props['redirect'] and request.method == "GET":
|
||||
return redirect(room_url, 302)
|
||||
else:
|
||||
return jsonify({"room_url": room_url, "token": token})
|
||||
except BaseException as e:
|
||||
return f"There was a problem starting the bot: {e}", 500
|
||||
|
||||
|
||||
@app.route("/healthz")
|
||||
def health_check():
|
||||
return "ok", 200
|
||||
BIN
examples/starter-apps/assets/clack-short-quiet.wav
Normal file
BIN
examples/starter-apps/assets/clack-short.wav
Normal file
BIN
examples/starter-apps/assets/clack.wav
Normal file
BIN
examples/starter-apps/assets/ding.wav
Normal file
BIN
examples/starter-apps/assets/ding2.wav
Normal file
BIN
examples/starter-apps/assets/ding3.wav
Normal file
BIN
examples/starter-apps/assets/grandma-listening.png
Normal file
|
After Width: | Height: | Size: 1.1 MiB |
BIN
examples/starter-apps/assets/grandma-writing.png
Normal file
|
After Width: | Height: | Size: 1.1 MiB |
BIN
examples/starter-apps/assets/listening.wav
Normal file
BIN
examples/starter-apps/assets/robot01.png
Normal file
|
After Width: | Height: | Size: 759 KiB |
BIN
examples/starter-apps/assets/robot010.png
Normal file
|
After Width: | Height: | Size: 884 KiB |
BIN
examples/starter-apps/assets/robot011.png
Normal file
|
After Width: | Height: | Size: 876 KiB |
BIN
examples/starter-apps/assets/robot012.png
Normal file
|
After Width: | Height: | Size: 881 KiB |
BIN
examples/starter-apps/assets/robot013.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/starter-apps/assets/robot014.png
Normal file
|
After Width: | Height: | Size: 874 KiB |
BIN
examples/starter-apps/assets/robot015.png
Normal file
|
After Width: | Height: | Size: 882 KiB |
BIN
examples/starter-apps/assets/robot016.png
Normal file
|
After Width: | Height: | Size: 885 KiB |
BIN
examples/starter-apps/assets/robot017.png
Normal file
|
After Width: | Height: | Size: 888 KiB |
BIN
examples/starter-apps/assets/robot018.png
Normal file
|
After Width: | Height: | Size: 890 KiB |
BIN
examples/starter-apps/assets/robot019.png
Normal file
|
After Width: | Height: | Size: 898 KiB |
BIN
examples/starter-apps/assets/robot02.png
Normal file
|
After Width: | Height: | Size: 836 KiB |
BIN
examples/starter-apps/assets/robot020.png
Normal file
|
After Width: | Height: | Size: 903 KiB |
BIN
examples/starter-apps/assets/robot021.png
Normal file
|
After Width: | Height: | Size: 908 KiB |
BIN
examples/starter-apps/assets/robot022.png
Normal file
|
After Width: | Height: | Size: 908 KiB |
BIN
examples/starter-apps/assets/robot023.png
Normal file
|
After Width: | Height: | Size: 905 KiB |
BIN
examples/starter-apps/assets/robot024.png
Normal file
|
After Width: | Height: | Size: 903 KiB |
BIN
examples/starter-apps/assets/robot025.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/starter-apps/assets/robot03.png
Normal file
|
After Width: | Height: | Size: 849 KiB |
BIN
examples/starter-apps/assets/robot04.png
Normal file
|
After Width: | Height: | Size: 866 KiB |