Compare commits
359 Commits
v0.0.38
...
async-reba
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9cd7c82e77 | ||
|
|
43161c816e | ||
|
|
6644c06af1 | ||
|
|
ed47212e07 | ||
|
|
db9cb74364 | ||
|
|
f64902eb25 | ||
|
|
e115a274d6 | ||
|
|
00239c2fd4 | ||
|
|
c0f9ad19fe | ||
|
|
46ac76701e | ||
|
|
1f77863aef | ||
|
|
d7555609fd | ||
|
|
7fe118ce63 | ||
|
|
44a349386c | ||
|
|
97cba92fa5 | ||
|
|
d9b16d4f73 | ||
|
|
50b6580fbb | ||
|
|
e7548f9494 | ||
|
|
830d2df671 | ||
|
|
13b50a07db | ||
|
|
4501dca133 | ||
|
|
2c8e566507 | ||
|
|
6e8a202107 | ||
|
|
2a05cd35b0 | ||
|
|
55a70cde8f | ||
|
|
706c00d897 | ||
|
|
d323ea9e95 | ||
|
|
b8ece84c6e | ||
|
|
a018112a13 | ||
|
|
d3a477902b | ||
|
|
298b151486 | ||
|
|
6a6ea251ae | ||
|
|
c7c709a0a7 | ||
|
|
6ac57b4854 | ||
|
|
f5e0b946c7 | ||
|
|
b1818cc370 | ||
|
|
d05717a1bd | ||
|
|
d11daee31a | ||
|
|
73da8c1910 | ||
|
|
f06aa300d0 | ||
|
|
c4e94e280e | ||
|
|
8f2941c575 | ||
|
|
447baad5c3 | ||
|
|
2703813e8a | ||
|
|
521e152150 | ||
|
|
3d43ad0f4d | ||
|
|
3621fceae2 | ||
|
|
e123f33c03 | ||
|
|
b8713666c2 | ||
|
|
cf0ab85e2c | ||
|
|
8502c7c801 | ||
|
|
e89814dc6b | ||
|
|
9461bacf0d | ||
|
|
e276dcbab7 | ||
|
|
1a3de0e819 | ||
|
|
ee3786fe15 | ||
|
|
31b5667cee | ||
|
|
a483f1a083 | ||
|
|
2ecec1c9f8 | ||
|
|
08ac311971 | ||
|
|
cb49b6a0d6 | ||
|
|
016da177db | ||
|
|
ec5998bc36 | ||
|
|
b1e17ee347 | ||
|
|
b6e1d6e6ae | ||
|
|
fa609f1afc | ||
|
|
470b5eafe7 | ||
|
|
2e5b0c1d6b | ||
|
|
a9390d96a1 | ||
|
|
8ee9621d66 | ||
|
|
49f2123893 | ||
|
|
cf72129852 | ||
|
|
8edee8155d | ||
|
|
c262b272fa | ||
|
|
9ef9c1c58a | ||
|
|
c7ff79a652 | ||
|
|
da81df5284 | ||
|
|
a4420dc88b | ||
|
|
eeb8338dce | ||
|
|
dfa4ac81fd | ||
|
|
ea16dca8aa | ||
|
|
306632b29a | ||
|
|
4533ed014f | ||
|
|
68cc4186ad | ||
|
|
9a4e749c7c | ||
|
|
55c645c614 | ||
|
|
a1024bb365 | ||
|
|
dfc82c3ba4 | ||
|
|
9e27a8aad0 | ||
|
|
c73111afea | ||
|
|
26a64afd8d | ||
|
|
78a3f081de | ||
|
|
e8f8a49646 | ||
|
|
219304c5ee | ||
|
|
f3fd312b83 | ||
|
|
357e66d64d | ||
|
|
4fa1ea8c4b | ||
|
|
3b81cd462d | ||
|
|
14acf05a26 | ||
|
|
58d9c84bc9 | ||
|
|
7e39d9ad3d | ||
|
|
a4edb3dab1 | ||
|
|
ed409d0460 | ||
|
|
50b45ac2da | ||
|
|
29bcbc68c5 | ||
|
|
affbe9ac7d | ||
|
|
1790fa452f | ||
|
|
607a246572 | ||
|
|
4f1b06e6b2 | ||
|
|
62e9a33a70 | ||
|
|
3298f935ef | ||
|
|
0e8f56c752 | ||
|
|
8224538372 | ||
|
|
fbf6eef68f | ||
|
|
f078d156de | ||
|
|
23d6eed5ea | ||
|
|
0ed3d118d6 | ||
|
|
337f048864 | ||
|
|
6f3c421621 | ||
|
|
eadd68d40b | ||
|
|
71202e3cd5 | ||
|
|
75008d8f11 | ||
|
|
2da0ecbe3c | ||
|
|
c7f814b2dc | ||
|
|
13a4a05388 | ||
|
|
20c019ae16 | ||
|
|
d9d6571c73 | ||
|
|
540cad4844 | ||
|
|
0a26b650c0 | ||
|
|
adaac003e5 | ||
|
|
3d4f125071 | ||
|
|
bce87f8717 | ||
|
|
1fe940bd6b | ||
|
|
cb36a71381 | ||
|
|
5acc4928fe | ||
|
|
434493b8aa | ||
|
|
f08b25dbb2 | ||
|
|
3665734972 | ||
|
|
a98d78cdea | ||
|
|
80f6d74e80 | ||
|
|
02d926e9bd | ||
|
|
7749692f72 | ||
|
|
7807cbeb39 | ||
|
|
72f231b327 | ||
|
|
3cbe97d346 | ||
|
|
b880e1a60e | ||
|
|
886046e696 | ||
|
|
9106a5f8ae | ||
|
|
98286336bf | ||
|
|
fa0deededa | ||
|
|
081b001c8b | ||
|
|
c92531a02f | ||
|
|
748a7af602 | ||
|
|
f4a0de6327 | ||
|
|
e405d7af9f | ||
|
|
51cd7fd285 | ||
|
|
aba5f89174 | ||
|
|
5c0f5a1613 | ||
|
|
7c342f7ba2 | ||
|
|
37e2388758 | ||
|
|
05f0492a8d | ||
|
|
c0ac5c6ae8 | ||
|
|
be923687fb | ||
|
|
5f32fb125d | ||
|
|
ae6fbb3146 | ||
|
|
864768635a | ||
|
|
d7c9679977 | ||
|
|
fedfc366f6 | ||
|
|
b3b39626e1 | ||
|
|
4e0ece17b6 | ||
|
|
fd3fdacdee | ||
|
|
a253606d50 | ||
|
|
568d9dc0a3 | ||
|
|
6629b853c5 | ||
|
|
3931cb3235 | ||
|
|
38cd86ad52 | ||
|
|
c0cdabf61d | ||
|
|
51270a96c5 | ||
|
|
84d72c0d5c | ||
|
|
79aca8169a | ||
|
|
b9d362bd62 | ||
|
|
87c4a1bee1 | ||
|
|
c979762b70 | ||
|
|
1d92fc3199 | ||
|
|
8ac7fb1a67 | ||
|
|
60c3d33def | ||
|
|
8a39d3f4eb | ||
|
|
e038767b6f | ||
|
|
0c46b3e481 | ||
|
|
d42f072ff5 | ||
|
|
9b6f29c24a | ||
|
|
873d5dc23f | ||
|
|
6d141fd47f | ||
|
|
c6f6cb2947 | ||
|
|
0eb189ce7f | ||
|
|
f4fd7b7028 | ||
|
|
21de8e0a35 | ||
|
|
6f55d494bd | ||
|
|
d216edc567 | ||
|
|
ec6063ecc4 | ||
|
|
40fe4ce6fb | ||
|
|
31d87a4048 | ||
|
|
ac8b171fa9 | ||
|
|
1f06d78213 | ||
|
|
28eba17df8 | ||
|
|
dfc2e62339 | ||
|
|
80c89a39c9 | ||
|
|
9d1c16e996 | ||
|
|
86604c2353 | ||
|
|
8f31a02938 | ||
|
|
47d375309d | ||
|
|
980265ca97 | ||
|
|
90479fff95 | ||
|
|
1ce1fcb0ce | ||
|
|
1a662376fc | ||
|
|
1d24f926ec | ||
|
|
4f2c37c940 | ||
|
|
042115a6bb | ||
|
|
c9f1469b41 | ||
|
|
54c9f604c9 | ||
|
|
56fbcd6562 | ||
|
|
e6b0500568 | ||
|
|
41038b6673 | ||
|
|
26d03f26c9 | ||
|
|
f3a4e54996 | ||
|
|
925e80bb20 | ||
|
|
9bda09b1a8 | ||
|
|
ef0d0531fa | ||
|
|
6520f20ffe | ||
|
|
ebc4e0924b | ||
|
|
9e7c0e6033 | ||
|
|
cf5720f316 | ||
|
|
655b468269 | ||
|
|
17f8c93e44 | ||
|
|
5b4061b0d5 | ||
|
|
6ce0227e98 | ||
|
|
a583a28850 | ||
|
|
32daf65adc | ||
|
|
e22c80610e | ||
|
|
374f1e7e01 | ||
|
|
d2dfa93bf1 | ||
|
|
fa8c6712c6 | ||
|
|
4c2b84cb4d | ||
|
|
b57c9d569b | ||
|
|
f0e50ba000 | ||
|
|
4a6638f749 | ||
|
|
31577252f3 | ||
|
|
5d71c50080 | ||
|
|
981269d594 | ||
|
|
848db985fc | ||
|
|
d5d8e31447 | ||
|
|
66670a2370 | ||
|
|
5637f349c6 | ||
|
|
93248e1d00 | ||
|
|
187769357f | ||
|
|
5be6422cc8 | ||
|
|
8670b2d994 | ||
|
|
0bc6db428d | ||
|
|
67d565930e | ||
|
|
b2a7ff6fd3 | ||
|
|
425a730d7c | ||
|
|
84c5709722 | ||
|
|
94deec01c9 | ||
|
|
6e0dd4a779 | ||
|
|
14bde340dd | ||
|
|
253765c611 | ||
|
|
2b26d7182f | ||
|
|
61ac83e2d9 | ||
|
|
d5c7b28cad | ||
|
|
959580a708 | ||
|
|
3a5cd17ea3 | ||
|
|
b78981bb9d | ||
|
|
a6d90b0a00 | ||
|
|
67016492f2 | ||
|
|
2c38089527 | ||
|
|
48f68ba6dc | ||
|
|
574df4ba3d | ||
|
|
49ca16d125 | ||
|
|
87525b085e | ||
|
|
6b53c6add3 | ||
|
|
29ca1b7855 | ||
|
|
a42d0c9907 | ||
|
|
8bc6ceaa3d | ||
|
|
0b8a1ab5d1 | ||
|
|
358c287db2 | ||
|
|
2e68453655 | ||
|
|
89b8a9de7d | ||
|
|
c4c2058df9 | ||
|
|
0d85c0085f | ||
|
|
6fa8a8f84f | ||
|
|
a97775bff3 | ||
|
|
32640e054d | ||
|
|
aa42da5658 | ||
|
|
900a94a825 | ||
|
|
c37552de70 | ||
|
|
916b37926c | ||
|
|
2b76c3c15a | ||
|
|
cedd7dde18 | ||
|
|
d088608d8e | ||
|
|
06ee29bb8b | ||
|
|
d255e954d6 | ||
|
|
6a7ab6b8ac | ||
|
|
45b18cc0b1 | ||
|
|
0479431f0a | ||
|
|
ec58dbd791 | ||
|
|
91de68aab3 | ||
|
|
85efc30145 | ||
|
|
0032594f21 | ||
|
|
829fdc5679 | ||
|
|
22e176e329 | ||
|
|
826a70a137 | ||
|
|
dd0ea674af | ||
|
|
a4761b8921 | ||
|
|
3958bb7903 | ||
|
|
83a037a7ce | ||
|
|
a3eb8337a6 | ||
|
|
541072f8e0 | ||
|
|
881248cbd6 | ||
|
|
d4979f5e64 | ||
|
|
4133cd03bb | ||
|
|
9f07c3ca27 | ||
|
|
b20bacb9ed | ||
|
|
97cfbfee1d | ||
|
|
fa7c941792 | ||
|
|
4738879f32 | ||
|
|
d5d88f756a | ||
|
|
65b136bf15 | ||
|
|
bee0b238e4 | ||
|
|
c891168ffb | ||
|
|
6376c2f6aa | ||
|
|
4d9b7cdd61 | ||
|
|
8263d1dd6f | ||
|
|
faf41c0b36 | ||
|
|
27a09c0b2c | ||
|
|
3db7f6a284 | ||
|
|
3bfeb5b5ef | ||
|
|
62a7a555b5 | ||
|
|
d60e99a043 | ||
|
|
77723b34c7 | ||
|
|
c466d34a06 | ||
|
|
f816897833 | ||
|
|
c1e8a5e522 | ||
|
|
76aca32f2e | ||
|
|
7e31b2a795 | ||
|
|
028e38a86b | ||
|
|
8cf7649855 | ||
|
|
64f5119b08 | ||
|
|
4d606aefb3 | ||
|
|
4bafdaa04d | ||
|
|
5afe1abf82 | ||
|
|
f066d50b98 | ||
|
|
91103e21cc | ||
|
|
f44dabcd65 | ||
|
|
0fd2fca231 | ||
|
|
5bb64098e7 | ||
|
|
3fc85e75e0 | ||
|
|
3f61ea16b7 | ||
|
|
4b393092b5 | ||
|
|
b583f5162b |
@@ -1,4 +1,4 @@
|
||||
name: lint
|
||||
name: format
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -12,12 +12,12 @@ on:
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: build-lint-${{ github.event.pull_request.number || github.ref }}
|
||||
group: build-format-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
autopep8:
|
||||
name: "Formatting lints"
|
||||
ruff-format:
|
||||
name: "Formatting checker"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: "3.10"
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
@@ -34,11 +34,8 @@ jobs:
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r dev-requirements.txt
|
||||
- name: autopep8
|
||||
id: autopep8
|
||||
- name: Ruff formatter
|
||||
id: ruff
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
autopep8 --max-line-length 100 --exit-code -r -d --exclude "*_pb2.py" -a -a src/
|
||||
- name: Fail if autopep8 requires changes
|
||||
if: steps.autopep8.outputs.exit-code == 2
|
||||
run: exit 1
|
||||
ruff format --config line-length=100 --diff --exclude "*_pb2.py"
|
||||
3
.github/workflows/publish_test.yaml
vendored
3
.github/workflows/publish_test.yaml
vendored
@@ -9,6 +9,9 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 100
|
||||
- name: Set up Python
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v4
|
||||
|
||||
19
.github/workflows/tests.yaml
vendored
19
.github/workflows/tests.yaml
vendored
@@ -20,21 +20,24 @@ jobs:
|
||||
name: "Unit and Integration Tests"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: "3.10"
|
||||
- name: Cache virtual environment
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
# We are hashing requirements-dev.txt and requirements-extra.txt which
|
||||
# contain all dependencies needed to run the tests and examples.
|
||||
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('linux-py3.10-requirements.txt') }}-${{ hashFiles('dev-requirements.txt') }}
|
||||
# We are hashing dev-requirements.txt and test-requirements.txt which
|
||||
# contain all dependencies needed to run the tests.
|
||||
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
|
||||
path: .venv
|
||||
- name: Install system packages
|
||||
run: sudo apt-get install -y portaudio19-dev
|
||||
id: install_system_packages
|
||||
run: |
|
||||
sudo apt-get install -y portaudio19-dev
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
@@ -42,8 +45,8 @@ jobs:
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r linux-py3.10-requirements.txt -r dev-requirements.txt
|
||||
pip install -r dev-requirements.txt -r test-requirements.txt
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" src tests
|
||||
pytest --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests
|
||||
|
||||
281
CHANGELOG.md
281
CHANGELOG.md
@@ -5,6 +5,279 @@ All notable changes to **pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Added Google TTS service and corresponding foundational example `07n-interruptible-google.py`
|
||||
|
||||
- Added AWS Polly TTS support and `07m-interruptible-aws.py` as an example.
|
||||
|
||||
- Added InputParams to Azure TTS service.
|
||||
|
||||
- All `FrameProcessors` can now register event handlers.
|
||||
|
||||
```
|
||||
tts = SomeTTSService(...)
|
||||
|
||||
@tts.event_handler("on_connected"):
|
||||
async def on_connected(processor):
|
||||
...
|
||||
```
|
||||
|
||||
- Added `AsyncGeneratorProcessor`. This processor can be used together with a
|
||||
`FrameSerializer` as an async generator. It provides a `generator()` function
|
||||
that returns an `AsyncGenerator` and that yields serialized frames.
|
||||
|
||||
- Added `EndTaskFrame` and `CancelTaskFrame`. These are new frames that are
|
||||
meant to be pushed upstream to tell the pipeline task to stop nicely or
|
||||
immediately respectively.
|
||||
|
||||
- Added configurable LLM parameters (e.g., temperature, top_p, max_tokens, seed)
|
||||
for OpenAI, Anthropic, and Together AI services along with corresponding
|
||||
setter functions.
|
||||
|
||||
- Added `sample_rate` as a constructor parameter for TTS services.
|
||||
|
||||
- Pipecat has a pipeline-based architecture. The pipeline consists of frame
|
||||
processors linked to each other. The elements traveling across the pipeline
|
||||
are called frames.
|
||||
|
||||
To have a deterministic behavior the frames traveling through the pipeline
|
||||
should always be ordered, except system frames which are out-of-band
|
||||
frames. To achieve that, each frame processor should only output frames from a
|
||||
single task.
|
||||
|
||||
In this version all the frame processors have their own task to push
|
||||
frames. That is, when `push_frame()` is called the given frame will be put
|
||||
into an internal queue (with the exception of system frames) and a frame
|
||||
processor task will push it out.
|
||||
|
||||
- Added pipeline clocks. A pipeline clock is used by the output transport to
|
||||
know when a frame needs to be presented. For that, all frames now have an
|
||||
optional `pts` field (prensentation timestamp). There's currently just one
|
||||
clock implementation `SystemClock` and the `pts` field is currently only used
|
||||
for `TextFrame`s (audio and image frames will be next).
|
||||
|
||||
- A clock can now be specified to `PipelineTask` (defaults to
|
||||
`SystemClock`). This clock will be passed to each frame processor via the
|
||||
`StartFrame`.
|
||||
|
||||
- Added `CartesiaHttpTTSService`.
|
||||
|
||||
- `DailyTransport` now supports setting the audio bitrate to improve audio
|
||||
quality through the `DailyParams.audio_out_bitrate` parameter. The new
|
||||
default is 96kbps.
|
||||
|
||||
- `DailyTransport` now uses the number of audio output channels (1 or 2) to set
|
||||
mono or stereo audio when needed.
|
||||
|
||||
- Interruptions support has been added to `TwilioFrameSerializer` when using
|
||||
`FastAPIWebsocketTransport`.
|
||||
|
||||
- Added new `LmntTTSService` text-to-speech service.
|
||||
(see https://www.lmnt.com/)
|
||||
|
||||
- Added `TTSModelUpdateFrame`, `TTSLanguageUpdateFrame`, `STTModelUpdateFrame`,
|
||||
and `STTLanguageUpdateFrame` frames to allow you to switch models, language
|
||||
and voices in TTS and STT services.
|
||||
|
||||
- Added new `transcriptions.Language` enum.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated individual update settings frame classes into a single UpdateSettingsFrame
|
||||
class for STT, LLM, and TTS.
|
||||
|
||||
- We now distinguish between input and output audio and image frames. We
|
||||
introduce `InputAudioRawFrame`, `OutputAudioRawFrame`, `InputImageRawFrame`
|
||||
and `OutputImageRawFrame` (and other subclasses of those). The input frames
|
||||
usually come from an input transport and are meant to be processed inside the
|
||||
pipeline to generate new frames. However, the input frames will not be sent
|
||||
through an output transport. The output frames can also be processed by any
|
||||
frame processor in the pipeline and they are allowed to be sent by the output
|
||||
transport.
|
||||
|
||||
- `ParallelTask` has been renamed to `SyncParallelPipeline`. A
|
||||
`SyncParallelPipeline` is a frame processor that contains a list of different
|
||||
pipelines to be executed concurrently. The difference between a
|
||||
`SyncParallelPipeline` and a `ParallelPipeline` is that, given an input frame,
|
||||
the `SyncParallelPipeline` will wait for all the internal pipelines to
|
||||
complete. This is achieved by making sure the last processor in each of the
|
||||
pipelines is synchronous (e.g. an HTTP-based service that waits for the
|
||||
response).
|
||||
|
||||
- `StartFrame` is back a system frame so we make sure it's processed immediately
|
||||
by all processors. `EndFrame` stays a control frame since it needs to be
|
||||
ordered allowing the frames in the pipeline to be processed.
|
||||
|
||||
- Updated `MoondreamService` revision to `2024-08-26`.
|
||||
|
||||
- `CartesiaTTSService` and `ElevenLabsTTSService` now add presentation
|
||||
timestamps to their text output. This allows the output transport to push the
|
||||
text frames downstream at almost the same time the words are spoken. We say
|
||||
"almost" because currently the audio frames don't have presentation timestamp
|
||||
but they should be played at roughly the same time.
|
||||
|
||||
- `DailyTransport.on_joined` event now returns the full session data instead of
|
||||
just the participant.
|
||||
|
||||
- `CartesiaTTSService` is now a subclass of `TTSService`.
|
||||
|
||||
- `DeepgramSTTService` is now a subclass of `STTService`.
|
||||
|
||||
- `WhisperSTTService` is now a subclass of `SegmentedSTTService`. A
|
||||
`SegmentedSTTService` is a `STTService` where the provided audio is given in a
|
||||
big chunk (i.e. from when the user starts speaking until the user stops
|
||||
speaking) instead of a continous stream.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `BaseOutputTransport` issue that would stop audio and video rendering
|
||||
tasks (after receiving and `EndFrame`) before the internal queue was emptied,
|
||||
causing the pipeline to finish prematurely.
|
||||
|
||||
- `StartFrame` should be the first frame every processor receives to avoid
|
||||
situations where things are not initialized (because initialization happens on
|
||||
`StartFrame`) and other frames come in resulting in undesired behavior.
|
||||
|
||||
### Performance
|
||||
|
||||
- `obj_id()` and `obj_count()` now use `itertools.count` avoiding the need of
|
||||
`threading.Lock`.
|
||||
|
||||
## [0.0.41] - 2024-08-22
|
||||
|
||||
### Added
|
||||
|
||||
- Added `LivekitFrameSerializer` audio frame serializer.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix `FastAPIWebsocketOutputTransport` variable name clash with subclass.
|
||||
|
||||
- Fix an `AnthropicLLMService` issue with empty arguments in function calling.
|
||||
|
||||
### Other
|
||||
|
||||
- Fixed `studypal` example errors.
|
||||
|
||||
## [0.0.40] - 2024-08-20
|
||||
|
||||
### Added
|
||||
|
||||
- VAD parameters can now be dynamicallt updated using the
|
||||
`VADParamsUpdateFrame`.
|
||||
|
||||
- `ErrorFrame` has now a `fatal` field to indicate the bot should exit if a
|
||||
fatal error is pushed upstream (false by default). A new `FatalErrorFrame`
|
||||
that sets this flag to true has been added.
|
||||
|
||||
- `AnthropicLLMService` now supports function calling and initial support for
|
||||
prompt caching.
|
||||
(see https://www.anthropic.com/news/prompt-caching)
|
||||
|
||||
- `ElevenLabsTTSService` can now specify ElevenLabs input parameters such as
|
||||
`output_format`.
|
||||
|
||||
- `TwilioFrameSerializer` can now specify Twilio's and Pipecat's desired sample
|
||||
rates to use.
|
||||
|
||||
- Added new `on_participant_updated` event to `DailyTransport`.
|
||||
|
||||
- Added `DailyRESTHelper.delete_room_by_name()` and
|
||||
`DailyRESTHelper.delete_room_by_url()`.
|
||||
|
||||
- Added LLM and TTS usage metrics. Those are enabled when
|
||||
`PipelineParams.enable_usage_metrics` is True.
|
||||
|
||||
- `AudioRawFrame`s are now pushed downstream from the base output
|
||||
transport. This allows capturing the exact words the bot says by adding an STT
|
||||
service at the end of the pipeline.
|
||||
|
||||
- Added new `GStreamerPipelineSource`. This processor can generate image or
|
||||
audio frames from a GStreamer pipeline (e.g. reading an MP4 file, and RTP
|
||||
stream or anything supported by GStreamer).
|
||||
|
||||
- Added `TransportParams.audio_out_is_live`. This flag is False by default and
|
||||
it is useful to indicate we should not synchronize audio with sporadic images.
|
||||
|
||||
- Added new `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame` control
|
||||
frames. These frames are pushed upstream and they should wrap
|
||||
`BotSpeakingFrame`.
|
||||
|
||||
- Transports now allow you to register event handlers without decorators.
|
||||
|
||||
### Changed
|
||||
|
||||
- Support RTVI message protocol 0.1. This includes new messages, support for
|
||||
messages responses, support for actions, configuration, webhooks and a bunch
|
||||
of new cool stuff.
|
||||
(see https://docs.rtvi.ai/)
|
||||
|
||||
- `SileroVAD` dependency is now imported via pip's `silero-vad` package.
|
||||
|
||||
- `ElevenLabsTTSService` now uses `eleven_turbo_v2_5` model by default.
|
||||
|
||||
- `BotSpeakingFrame` is now a control frame.
|
||||
|
||||
- `StartFrame` is now a control frame similar to `EndFrame`.
|
||||
|
||||
- `DeepgramTTSService` now is more customizable. You can adjust the encoding and
|
||||
sample rate.
|
||||
|
||||
### Fixed
|
||||
|
||||
- `TTSStartFrame` and `TTSStopFrame` are now sent when TTS really starts and
|
||||
stops. This allows for knowing when the bot starts and stops speaking even
|
||||
with asynchronous services (like Cartesia).
|
||||
|
||||
- Fixed `AzureSTTService` transcription frame timestamps.
|
||||
|
||||
- Fixed an issue with `DailyRESTHelper.create_room()` expirations which would
|
||||
cause this function to stop working after the initial expiration elapsed.
|
||||
|
||||
- Improved `EndFrame` and `CancelFrame` handling. `EndFrame` should end things
|
||||
gracefully while a `CancelFrame` should cancel all running tasks as soon as
|
||||
possible.
|
||||
|
||||
- Fixed an issue in `AIService` that would cause a yielded `None` value to be
|
||||
processed.
|
||||
|
||||
- RTVI's `bot-ready` message is now sent when the RTVI pipeline is ready and
|
||||
a first participant joins.
|
||||
|
||||
- Fixed a `BaseInputTransport` issue that was causing incoming system frames to
|
||||
be queued instead of being pushed immediately.
|
||||
|
||||
- Fixed a `BaseInputTransport` issue that was causing start/stop interruptions
|
||||
incoming frames to not cancel tasks and be processed properly.
|
||||
|
||||
### Other
|
||||
|
||||
- Added `studypal` example (from to the Cartesia folks!).
|
||||
|
||||
- Most examples now use Cartesia.
|
||||
|
||||
- Added examples `foundational/19a-tools-anthropic.py`,
|
||||
`foundational/19b-tools-video-anthropic.py` and
|
||||
`foundational/19a-tools-togetherai.py`.
|
||||
|
||||
- Added examples `foundational/18-gstreamer-filesrc.py` and
|
||||
`foundational/18a-gstreamer-videotestsrc.py` that show how to use
|
||||
`GStreamerPipelineSource`
|
||||
|
||||
- Remove `requests` library usage.
|
||||
|
||||
- Cleanup examples and use `DailyRESTHelper`.
|
||||
|
||||
## [0.0.39] - 2024-07-23
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a regression introduced in 0.0.38 that would cause Daily transcription
|
||||
to stop the Pipeline.
|
||||
|
||||
## [0.0.38] - 2024-07-23
|
||||
|
||||
### Added
|
||||
@@ -97,7 +370,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- It is now possible to specify a Silero VAD version when using `SileroVADAnalyzer`
|
||||
or `SileroVAD`.
|
||||
|
||||
- Added `AysncFrameProcessor` and `AsyncAIService`. Some services like
|
||||
- Added `AysncFrameProcessor` and `AsyncAIService`. Some services like
|
||||
`DeepgramSTTService` need to process things asynchronously. For example, audio
|
||||
is sent to Deepgram but transcriptions are not returned immediately. In these
|
||||
cases we still require all frames (except system frames) to be pushed
|
||||
@@ -114,7 +387,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
- `WhisperSTTService` model can now also be a string.
|
||||
|
||||
- Added missing * keyword separators in services.
|
||||
- Added missing \* keyword separators in services.
|
||||
|
||||
### Fixed
|
||||
|
||||
@@ -191,7 +464,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Added new `TwilioFrameSerializer`. This is a new serializer that knows how to
|
||||
serialize and deserialize audio frames from Twilio.
|
||||
|
||||
- Added Daily transport event: `on_dialout_answered`. See
|
||||
- Added Daily transport event: `on_dialout_answered`. See
|
||||
https://reference-python.daily.co/api_reference.html#daily.EventHandler
|
||||
|
||||
- Added new `AzureSTTService`. This allows you to use Azure Speech-To-Text.
|
||||
@@ -431,7 +704,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Added Daily transport support for dial-in use cases.
|
||||
|
||||
- Added Daily transport events: `on_dialout_connected`, `on_dialout_stopped`,
|
||||
`on_dialout_error` and `on_dialout_warning`. See
|
||||
`on_dialout_error` and `on_dialout_warning`. See
|
||||
https://reference-python.daily.co/api_reference.html#daily.EventHandler
|
||||
|
||||
## [0.0.21] - 2024-05-22
|
||||
|
||||
58
README.md
58
README.md
@@ -4,8 +4,7 @@
|
||||
|
||||
# Pipecat
|
||||
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://discord.gg/pipecat)
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
|
||||
|
||||
`pipecat` is a framework for building voice (and multimodal) conversational agents. Things like personal coaches, meeting assistants, [story-telling toys for kids](https://storytelling-chatbot.fly.dev/), customer support bots, [intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0), and snarky social companions.
|
||||
|
||||
@@ -39,7 +38,7 @@ pip install "pipecat-ai[option,...]"
|
||||
|
||||
Your project may or may not need these, so they're made available as optional requirements. Here is a list:
|
||||
|
||||
- **AI services**: `anthropic`, `azure`, `deepgram`, `gladia`, `google`, `fal`, `moondream`, `openai`, `openpipe`, `playht`, `silero`, `whisper`, `xtts`
|
||||
- **AI services**: `anthropic`, `aws`, `azure`, `deepgram`, `gladia`, `google`, `fal`, `lmnt`, `moondream`, `openai`, `openpipe`, `playht`, `silero`, `whisper`, `xtts`
|
||||
- **Transports**: `local`, `websocket`, `daily`
|
||||
|
||||
## Code examples
|
||||
@@ -49,7 +48,7 @@ Your project may or may not need these, so they're made available as optional re
|
||||
|
||||
## A simple voice agent running locally
|
||||
|
||||
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [ElevenLabs](https://elevenlabs.io/) for text-to-speech.
|
||||
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [Cartesia](https://cartesia.ai/) for text-to-speech.
|
||||
|
||||
```python
|
||||
#app.py
|
||||
@@ -61,7 +60,7 @@ from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
async def main():
|
||||
@@ -73,11 +72,10 @@ async def main():
|
||||
bot_name="Bot Name",
|
||||
params=DailyParams(audio_out_enabled=True))
|
||||
|
||||
# Use Eleven Labs for Text-to-Speech
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=...,
|
||||
voice_id=...,
|
||||
# Use Cartesia for Text-to-Speech
|
||||
tts = CartesiaTTSService(
|
||||
api_key=...,
|
||||
voice_id=...
|
||||
)
|
||||
|
||||
# Simple pipeline that will process text to speech and output the result
|
||||
@@ -94,7 +92,7 @@ async def main():
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Eleven Labs)
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Cartesia)
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
|
||||
# Run the pipeline task
|
||||
@@ -112,7 +110,6 @@ python app.py
|
||||
|
||||
Daily provides a prebuilt WebRTC user interface. Whilst the app is running, you can visit at `https://<yourdomain>.daily.co/<room_url>` and listen to the bot say hello!
|
||||
|
||||
|
||||
## WebRTC for production use
|
||||
|
||||
WebSockets are fine for server-to-server communication or for initial development. But for production use, you’ll need client-server audio to use a protocol designed for real-time media transport. (For an explanation of the difference between WebSockets and WebRTC, see [this post.](https://www.daily.co/blog/how-to-talk-to-an-llm-with-your-voice/#webrtc))
|
||||
@@ -133,7 +130,6 @@ pip install pipecat-ai[silero]
|
||||
|
||||
The first time your run your bot with Silero, startup may take a while whilst it downloads and caches the model in the background. You can check the progress of this in the console.
|
||||
|
||||
|
||||
## Hacking on the framework itself
|
||||
|
||||
_Note that you may need to set up a virtual environment before following the instructions below. For instance, you might need to run the following from the root of the repo:_
|
||||
@@ -146,20 +142,20 @@ source venv/bin/activate
|
||||
From the root of this repo, run the following:
|
||||
|
||||
```shell
|
||||
pip install -r dev-requirements.txt -r {env}-requirements.txt
|
||||
pip install -r dev-requirements.txt
|
||||
python -m build
|
||||
```
|
||||
|
||||
This builds the package. To use the package locally (eg to run sample files), run
|
||||
This builds the package. To use the package locally (e.g. to run sample files), run
|
||||
|
||||
```shell
|
||||
pip install --editable .
|
||||
pip install --editable ".[option,...]"
|
||||
```
|
||||
|
||||
If you want to use this package from another directory, you can run:
|
||||
|
||||
```shell
|
||||
pip install path_to_this_repo
|
||||
pip install "path_to_this_repo[option,...]"
|
||||
```
|
||||
|
||||
### Running tests
|
||||
@@ -167,27 +163,29 @@ pip install path_to_this_repo
|
||||
From the root directory, run:
|
||||
|
||||
```shell
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" src tests
|
||||
pytest --doctest-modules --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests
|
||||
```
|
||||
|
||||
## Setting up your editor
|
||||
|
||||
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting.
|
||||
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting via [Ruff](https://github.com/astral-sh/ruff).
|
||||
|
||||
### Emacs
|
||||
|
||||
You can use [use-package](https://github.com/jwiegley/use-package) to install [py-autopep8](https://codeberg.org/ideasman42/emacs-py-autopep8) package and configure `autopep8` arguments:
|
||||
You can use [use-package](https://github.com/jwiegley/use-package) to install [emacs-lazy-ruff](https://github.com/christophermadsen/emacs-lazy-ruff) package and configure `ruff` arguments:
|
||||
|
||||
```elisp
|
||||
(use-package py-autopep8
|
||||
(use-package lazy-ruff
|
||||
:ensure t
|
||||
:defer t
|
||||
:hook ((python-mode . py-autopep8-mode))
|
||||
:hook ((python-mode . lazy-ruff-mode))
|
||||
:config
|
||||
(setq py-autopep8-options '("-a" "-a", "--max-line-length=100")))
|
||||
(setq lazy-ruff-format-command "ruff format --config line-length=100")
|
||||
(setq lazy-ruff-only-format-block t)
|
||||
(setq lazy-ruff-only-format-region t)
|
||||
(setq lazy-ruff-only-format-buffer t))
|
||||
```
|
||||
|
||||
`autopep8` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
|
||||
`ruff` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
|
||||
|
||||
```elisp
|
||||
(use-package pyvenv-auto
|
||||
@@ -200,18 +198,14 @@ You can use [use-package](https://github.com/jwiegley/use-package) to install [p
|
||||
### Visual Studio Code
|
||||
|
||||
Install the
|
||||
[autopep8](https://marketplace.visualstudio.com/items?itemName=ms-python.autopep8) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, enable formatting on save and configure `autopep8` arguments:
|
||||
[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, enable formatting on save and configure `ruff` arguments:
|
||||
|
||||
```json
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.autopep8",
|
||||
"editor.defaultFormatter": "charliermarsh.ruff",
|
||||
"editor.formatOnSave": true
|
||||
},
|
||||
"autopep8.args": [
|
||||
"-a",
|
||||
"-a",
|
||||
"--max-line-length=100"
|
||||
],
|
||||
"ruff.format.args": ["--config", "line-length=100"]
|
||||
```
|
||||
|
||||
## Getting help
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
autopep8~=2.1.0
|
||||
build~=1.2.1
|
||||
grpcio-tools~=1.62.2
|
||||
pip-tools~=7.4.1
|
||||
pyright~=1.1.367
|
||||
pytest~=8.2.0
|
||||
setuptools~=69.5.1
|
||||
pyright~=1.1.376
|
||||
pytest~=8.3.2
|
||||
ruff~=0.6.7
|
||||
setuptools~=72.2.0
|
||||
setuptools_scm~=8.1.0
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
# Anthropic
|
||||
ANTHROPIC_API_KEY=...
|
||||
|
||||
# AWS
|
||||
AWS_SECRET_ACCESS_KEY=...
|
||||
AWS_ACCESS_KEY_ID=...
|
||||
AWS_REGION=...
|
||||
|
||||
# Azure
|
||||
AZURE_SPEECH_REGION=...
|
||||
AZURE_SPEECH_API_KEY=...
|
||||
@@ -30,6 +35,10 @@ FIREWORKS_API_KEY=...
|
||||
# Gladia
|
||||
GLADIA_API_KEY=...
|
||||
|
||||
# LMNT
|
||||
LMNT_API_KEY=...
|
||||
LMNT_VOICE_ID=...
|
||||
|
||||
# PlayHT
|
||||
PLAY_HT_USER_ID=...
|
||||
PLAY_HT_API_KEY=...
|
||||
|
||||
@@ -41,6 +41,7 @@ Next, follow the steps in the README for each demo.
|
||||
| [Patient intake](patient-intake) | A chatbot that can call functions in response to user input. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Dialin Chatbot](dialin-chatbot) | A chatbot that connects to an incoming phone call from Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
|
||||
| [Twilio Chatbot](twilio-chatbot) | A chatbot that connects to an incoming phone call from Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
|
||||
| [studypal](studypal) | A chatbot to have a conversation about any article on the web | |
|
||||
|
||||
> [!IMPORTANT]
|
||||
> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.
|
||||
|
||||
@@ -9,8 +9,5 @@ COPY *.py .
|
||||
COPY ./requirements.txt requirements.txt
|
||||
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
|
||||
|
||||
# Install models
|
||||
RUN python3 install_deps.py
|
||||
|
||||
# Start the FastAPI server
|
||||
CMD python3 bot_runner.py --port ${FAST_API_PORT}
|
||||
@@ -2,8 +2,6 @@
|
||||
|
||||
This project modifies the `bot_runner.py` server to launch a new machine for each user session. This is a recommended approach for production vs. running shell processess as your deployment will quickly run out of system resources under load.
|
||||
|
||||
To speed up machine boot times, we also download and cache Silero VAD as part of the Dockerfile (`install_deps.py`). If you are using other custom models, you can add them here too.
|
||||
|
||||
For this example, we are using Daily as a WebRTC transport and provisioning a new room and token for each session. You can use another transport, such as WebSockets, by modifying the `bot.py` and `bot_runner.py` files accordingly.
|
||||
|
||||
## Setting up your fly.io deployment
|
||||
@@ -14,7 +12,7 @@ You can copy the `example-fly.toml` as a reference. Be sure to change the app na
|
||||
|
||||
### Create your .env file
|
||||
|
||||
Copy the base `env.example` to `.env` and enter the necessary API keys.
|
||||
Copy the base `env.example` to `.env` and enter the necessary API keys.
|
||||
|
||||
`FLY_APP_NAME` should match that in the `fly.toml` file.
|
||||
|
||||
@@ -32,7 +30,6 @@ Note: you can do this manually via the fly.io dashboard under the "secrets" sub-
|
||||
|
||||
`fly deploy`
|
||||
|
||||
|
||||
## Connecting to your bot
|
||||
|
||||
Send a post request to your running fly.io instance:
|
||||
@@ -40,4 +37,3 @@ Send a post request to your running fly.io instance:
|
||||
`curl --location --request POST 'https://YOUR_FLY_APP_NAME/start_bot'`
|
||||
|
||||
This request will wait until the machine enters into a `starting` state, before returning the a room URL and token to join.
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
@@ -7,7 +6,10 @@ import argparse
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
@@ -17,6 +19,7 @@ from pipecat.vad.silero import SileroVADAnalyzer
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -27,71 +30,69 @@ daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
)
|
||||
)
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your output will be converted to audio so don't include special characters other than '!' or '?' in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying hello.",
|
||||
},
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your output will be converted to audio so don't include special characters other than '!' or '?' in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying hello.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
])
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if state == "left":
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if state == "left":
|
||||
await task.queue_frame(EndFrame())
|
||||
runner = PipelineRunner()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,15 +1,29 @@
|
||||
import os
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import subprocess
|
||||
import requests
|
||||
import os
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomParams
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomProperties,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -17,88 +31,95 @@ load_dotenv(override=True)
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
REQUIRED_ENV_VARS = [
|
||||
'DAILY_API_KEY',
|
||||
'OPENAI_API_KEY',
|
||||
'ELEVENLABS_API_KEY',
|
||||
'ELEVENLABS_VOICE_ID',
|
||||
'FLY_API_KEY',
|
||||
'FLY_APP_NAME',]
|
||||
"DAILY_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"ELEVENLABS_API_KEY",
|
||||
"ELEVENLABS_VOICE_ID",
|
||||
"FLY_API_KEY",
|
||||
"FLY_APP_NAME",
|
||||
]
|
||||
|
||||
FLY_API_HOST = os.getenv("FLY_API_HOST", "https://api.machines.dev/v1")
|
||||
FLY_APP_NAME = os.getenv("FLY_APP_NAME", "pipecat-fly-example")
|
||||
FLY_API_KEY = os.getenv("FLY_API_KEY", "")
|
||||
FLY_HEADERS = {
|
||||
'Authorization': f"Bearer {FLY_API_KEY}",
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
FLY_HEADERS = {"Authorization": f"Bearer {FLY_API_KEY}", "Content-Type": "application/json"}
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
os.getenv("DAILY_API_KEY", ""),
|
||||
os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'))
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
# ----------------- API ----------------- #
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"]
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# ----------------- Main ----------------- #
|
||||
|
||||
|
||||
def spawn_fly_machine(room_url: str, token: str):
|
||||
# Use the same image as the bot runner
|
||||
res = requests.get(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to get machine info from Fly: {res.text}")
|
||||
image = res.json()[0]['config']['image']
|
||||
async def spawn_fly_machine(room_url: str, token: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use the same image as the bot runner
|
||||
async with session.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Unable to get machine info from Fly: {text}")
|
||||
|
||||
# Machine configuration
|
||||
cmd = f"python3 bot.py -u {room_url} -t {token}"
|
||||
cmd = cmd.split()
|
||||
worker_props = {
|
||||
"config": {
|
||||
"image": image,
|
||||
"auto_destroy": True,
|
||||
"init": {
|
||||
"cmd": cmd
|
||||
data = await r.json()
|
||||
image = data[0]["config"]["image"]
|
||||
|
||||
# Machine configuration
|
||||
cmd = f"python3 bot.py -u {room_url} -t {token}"
|
||||
cmd = cmd.split()
|
||||
worker_props = {
|
||||
"config": {
|
||||
"image": image,
|
||||
"auto_destroy": True,
|
||||
"init": {"cmd": cmd},
|
||||
"restart": {"policy": "no"},
|
||||
"guest": {"cpu_kind": "shared", "cpus": 1, "memory_mb": 1024},
|
||||
},
|
||||
"restart": {
|
||||
"policy": "no"
|
||||
},
|
||||
"guest": {
|
||||
"cpu_kind": "shared",
|
||||
"cpus": 1,
|
||||
"memory_mb": 1024
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
}
|
||||
# Spawn a new machine instance
|
||||
async with session.post(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS, json=worker_props
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Problem starting a bot worker: {text}")
|
||||
|
||||
# Spawn a new machine instance
|
||||
res = requests.post(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines",
|
||||
headers=FLY_HEADERS,
|
||||
json=worker_props)
|
||||
data = await r.json()
|
||||
# Wait for the machine to enter the started state
|
||||
vm_id = data["id"]
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Problem starting a bot worker: {res.text}")
|
||||
|
||||
# Wait for the machine to enter the started state
|
||||
vm_id = res.json()['id']
|
||||
|
||||
res = requests.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started",
|
||||
headers=FLY_HEADERS)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Bot was unable to enter started state: {res.text}")
|
||||
async with session.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started",
|
||||
headers=FLY_HEADERS,
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Bot was unable to enter started state: {text}")
|
||||
|
||||
print(f"Machine joined room: {room_url}")
|
||||
|
||||
@@ -117,29 +138,23 @@ async def start_bot(request: Request) -> JSONResponse:
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", "")
|
||||
|
||||
if not room_url:
|
||||
params = DailyRoomParams(
|
||||
properties=DailyRoomProperties()
|
||||
)
|
||||
params = DailyRoomParams(properties=DailyRoomProperties())
|
||||
try:
|
||||
room: DailyRoomObject = daily_rest_helper.create_room(params=params)
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Unable to provision room {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Unable to provision room {e}")
|
||||
else:
|
||||
# Check passed room URL exists, we should assume that it already has a sip set up
|
||||
try:
|
||||
room: DailyRoomObject = daily_rest_helper.get_room_from_url(room_url)
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Room not found: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
# Launch a new fly.io machine, or run as a shell process (not recommended)
|
||||
run_as_process = os.getenv("RUN_AS_PROCESS", False)
|
||||
@@ -150,24 +165,26 @@ async def start_bot(request: Request) -> JSONResponse:
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
else:
|
||||
try:
|
||||
spawn_fly_machine(room.url, token)
|
||||
await spawn_fly_machine(room.url, token)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to spawn VM: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to spawn VM: {e}")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
user_token = daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
|
||||
user_token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"room_url": room.url,
|
||||
"token": user_token,
|
||||
}
|
||||
)
|
||||
|
||||
return JSONResponse({
|
||||
"room_url": room.url,
|
||||
"token": user_token,
|
||||
})
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
@@ -176,24 +193,19 @@ if __name__ == "__main__":
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=os.getenv("HOST", "0.0.0.0"), help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
default=False, help="Reload code on change")
|
||||
parser.add_argument(
|
||||
"--host", type=str, default=os.getenv("HOST", "0.0.0.0"), help="Host address"
|
||||
)
|
||||
parser.add_argument("--port", type=int, default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument(
|
||||
"--reload", action="store_true", default=False, help="Reload code on change"
|
||||
)
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(
|
||||
"bot_runner:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
reload=config.reload
|
||||
)
|
||||
|
||||
uvicorn.run("bot_runner:app", host=config.host, port=config.port, reload=config.reload)
|
||||
except KeyboardInterrupt:
|
||||
print("Pipecat runner shutting down...")
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
import torch
|
||||
|
||||
# Download (cache) the Silero VAD model
|
||||
torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=True)
|
||||
@@ -1,6 +1,5 @@
|
||||
pipecat-ai[daily,openai,silero]
|
||||
fastapi
|
||||
uvicorn
|
||||
requests
|
||||
python-dotenv
|
||||
loguru
|
||||
loguru
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
@@ -7,11 +6,11 @@ import argparse
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.frames.frames import (
|
||||
LLMMessagesFrame,
|
||||
EndFrame
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyDialinSettings
|
||||
@@ -19,6 +18,7 @@ from pipecat.vad.silero import SileroVADAnalyzer
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -29,75 +29,70 @@ daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str, callId: str, callDomain: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# diallin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
diallin_settings = DailyDialinSettings(
|
||||
call_id=callId,
|
||||
call_domain=callDomain
|
||||
)
|
||||
# diallin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
diallin_settings = DailyDialinSettings(call_id=callId, call_domain=callDomain)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=diallin_settings,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
)
|
||||
)
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=diallin_settings,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Oh, hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Oh, hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
])
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -6,40 +6,62 @@ provisioning a room and starting a Pipecat bot in response.
|
||||
|
||||
Refer to README for more information.
|
||||
"""
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomSipParams, DailyRoomParams
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, PlainTextResponse
|
||||
from twilio.twiml.voice_response import VoiceResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomProperties,
|
||||
DailyRoomSipParams,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# ------------ Configuration ------------ #
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
REQUIRED_ENV_VARS = ['OPENAI_API_KEY', 'DAILY_API_KEY',
|
||||
'ELEVENLABS_API_KEY', 'ELEVENLABS_VOICE_ID']
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
os.getenv("DAILY_API_KEY", ""),
|
||||
os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'))
|
||||
REQUIRED_ENV_VARS = ["OPENAI_API_KEY", "DAILY_API_KEY", "ELEVENLABS_API_KEY", "ELEVENLABS_VOICE_ID"]
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
# ----------------- API ----------------- #
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"]
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
"""
|
||||
@@ -53,61 +75,51 @@ action using the Twilio Client library.
|
||||
"""
|
||||
|
||||
|
||||
def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
|
||||
async def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
|
||||
if not room_url:
|
||||
params = DailyRoomParams(
|
||||
properties=DailyRoomProperties(
|
||||
# Note: these are the default values, except for the display name
|
||||
sip=DailyRoomSipParams(
|
||||
display_name="dialin-user",
|
||||
video=False,
|
||||
sip_mode="dial-in",
|
||||
num_endpoints=1
|
||||
display_name="dialin-user", video=False, sip_mode="dial-in", num_endpoints=1
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
print(f"Creating new room...")
|
||||
room: DailyRoomObject = daily_rest_helper.create_room(params=params)
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
|
||||
else:
|
||||
# Check passed room URL exist (we assume that it already has a sip set up!)
|
||||
try:
|
||||
print(f"Joining existing room: {room_url}")
|
||||
room: DailyRoomObject = daily_rest_helper.get_room_from_url(
|
||||
room_url)
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Room not found: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
print(f"Daily room: {room.url} {room.config.sip_endpoint}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get room or token token")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get room or token token")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in docs)
|
||||
if vendor == "daily":
|
||||
bot_proc = f"python3 -m bot_daily -u {room.url} -t {token} -i {
|
||||
callId} -d {callDomain}"
|
||||
bot_proc = f"python3 - m bot_daily - u {room.url} - t {token} - i {
|
||||
callId} - d {callDomain}"
|
||||
else:
|
||||
bot_proc = f"python3 -m bot_twilio -u {room.url} -t {
|
||||
token} -i {callId} -s {room.config.sip_endpoint}"
|
||||
bot_proc = f"python3 - m bot_twilio - u {room.url} - t {
|
||||
token} - i {callId} - s {room.config.sip_endpoint}"
|
||||
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[bot_proc],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
[bot_proc], shell=True, bufsize=1, cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return room
|
||||
|
||||
@@ -130,18 +142,16 @@ async def twilio_start_bot(request: Request):
|
||||
pass
|
||||
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
|
||||
callId = data.get('CallSid')
|
||||
callId = data.get("CallSid")
|
||||
|
||||
if not callId:
|
||||
raise HTTPException(
|
||||
status_code=500, detail="Missing 'CallSid' in request")
|
||||
raise HTTPException(status_code=500, detail="Missing 'CallSid' in request")
|
||||
|
||||
print("CallId: %s" % callId)
|
||||
|
||||
# create room and tell the bot to join the created room
|
||||
# note: Twilio does not require a callDomain
|
||||
room: DailyRoomObject = _create_daily_room(
|
||||
room_url, callId, None, "twilio")
|
||||
room: DailyRoomObject = await _create_daily_room(room_url, callId, None, "twilio")
|
||||
|
||||
print(f"Put Twilio on hold...")
|
||||
# We have the room and the SIP URI,
|
||||
@@ -151,7 +161,8 @@ async def twilio_start_bot(request: Request):
|
||||
# http://com.twilio.music.classical.s3.amazonaws.com/BusyStrings.mp3
|
||||
resp = VoiceResponse()
|
||||
resp.play(
|
||||
url="http://com.twilio.sounds.music.s3.amazonaws.com/MARKOVICHAMP-Borghestral.mp3", loop=10)
|
||||
url="http://com.twilio.sounds.music.s3.amazonaws.com/MARKOVICHAMP-Borghestral.mp3", loop=10
|
||||
)
|
||||
return str(resp)
|
||||
|
||||
|
||||
@@ -173,19 +184,14 @@ async def daily_start_bot(request: Request) -> JSONResponse:
|
||||
callId = data.get("callId", None)
|
||||
callDomain = data.get("callDomain", None)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing properties 'callId' or 'callDomain'")
|
||||
raise HTTPException(status_code=500, detail="Missing properties 'callId' or 'callDomain'")
|
||||
|
||||
print(f"CallId: {callId}, CallDomain: {callDomain}")
|
||||
room: DailyRoomObject = _create_daily_room(
|
||||
room_url, callId, callDomain, "daily")
|
||||
room: DailyRoomObject = await _create_daily_room(room_url, callId, callDomain, "daily")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
return JSONResponse({
|
||||
"room_url": room.url,
|
||||
"sipUri": room.config.sip_endpoint
|
||||
})
|
||||
return JSONResponse({"room_url": room.url, "sipUri": room.config.sip_endpoint})
|
||||
|
||||
|
||||
# ----------------- Main ----------------- #
|
||||
|
||||
@@ -197,24 +203,18 @@ if __name__ == "__main__":
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=os.getenv("HOST", "0.0.0.0"), help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
default=True, help="Reload code on change")
|
||||
parser.add_argument(
|
||||
"--host", type=str, default=os.getenv("HOST", "0.0.0.0"), help="Host address"
|
||||
)
|
||||
parser.add_argument("--port", type=int, default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", default=True, help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(
|
||||
"bot_runner:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
reload=config.reload
|
||||
)
|
||||
uvicorn.run("bot_runner:app", host=config.host, port=config.port, reload=config.reload)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Pipecat runner shutting down...")
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
@@ -7,11 +6,11 @@ import argparse
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.frames.frames import (
|
||||
LLMMessagesFrame,
|
||||
EndFrame
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -22,96 +21,95 @@ from twilio.rest import Client
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
twilio_account_sid = os.getenv('TWILIO_ACCOUNT_SID')
|
||||
twilio_auth_token = os.getenv('TWILIO_AUTH_TOKEN')
|
||||
twilio_account_sid = os.getenv("TWILIO_ACCOUNT_SID")
|
||||
twilio_auth_token = os.getenv("TWILIO_AUTH_TOKEN")
|
||||
twilioclient = Client(twilio_account_sid, twilio_auth_token)
|
||||
|
||||
daily_api_key = os.getenv("DAILY_API_KEY", "")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str, callId: str, sipUri: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# diallin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=None, # Not required for Twilio
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
)
|
||||
)
|
||||
# dialin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=None, # Not required for Twilio
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
])
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_dialin_ready")
|
||||
async def on_dialin_ready(transport, cdata):
|
||||
# For Twilio, Telnyx, etc. You need to update the state of the call
|
||||
# and forward it to the sip_uri..
|
||||
print(f"Forwarding call: {callId} {sipUri}")
|
||||
@transport.event_handler("on_dialin_ready")
|
||||
async def on_dialin_ready(transport, cdata):
|
||||
# For Twilio, Telnyx, etc. You need to update the state of the call
|
||||
# and forward it to the sip_uri..
|
||||
print(f"Forwarding call: {callId} {sipUri}")
|
||||
|
||||
try:
|
||||
# The TwiML is updated using Twilio's client library
|
||||
call = twilioclient.calls(callId).update(
|
||||
twiml=f'<Response><Dial><Sip>{sipUri}</Sip></Dial></Response>'
|
||||
)
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to forward call: {str(e)}")
|
||||
try:
|
||||
# The TwiML is updated using Twilio's client library
|
||||
call = twilioclient.calls(callId).update(
|
||||
twiml=f"<Response><Dial><Sip>{sipUri}</Sip></Dial></Response>"
|
||||
)
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to forward call: {str(e)}")
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
pipecat-ai[daily,openai,silero]
|
||||
pipecat-ai[daily,elevenlabs,openai,silero]
|
||||
fastapi
|
||||
uvicorn
|
||||
requests
|
||||
python-dotenv
|
||||
loguru
|
||||
twilio
|
||||
twilio
|
||||
python-multipart
|
||||
|
||||
@@ -13,7 +13,7 @@ from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
@@ -21,21 +21,24 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True))
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -46,11 +49,11 @@ async def main(room_url):
|
||||
# participant joins.
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
participant_name = participant["info"]["userName"] or ""
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -9,17 +9,18 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -30,10 +31,9 @@ async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = LocalAudioTransport(TransportParams(audio_out_enabled=True))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
pipeline = Pipeline([tts, transport.output()])
|
||||
@@ -42,7 +42,7 @@ async def main():
|
||||
|
||||
async def say_something():
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frames([TextFrame("Hello there!"), EndFrame()])
|
||||
await task.queue_frame(TextFrame("Hello there!"))
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
108
examples/foundational/01b-livekit-audio.py
Normal file
108
examples/foundational/01b-livekit-audio.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from livekit import api # pip install livekit-api
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
def generate_token(room_name: str, participant_name: str, api_key: str, api_secret: str) -> str:
|
||||
token = api.AccessToken(api_key, api_secret)
|
||||
token.with_identity(participant_name).with_name(participant_name).with_grants(
|
||||
api.VideoGrants(
|
||||
room_join=True,
|
||||
room=room_name,
|
||||
)
|
||||
)
|
||||
|
||||
return token.to_jwt()
|
||||
|
||||
|
||||
async def configure_livekit():
|
||||
parser = argparse.ArgumentParser(description="LiveKit AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-r", "--room", type=str, required=False, help="Name of the LiveKit room to join"
|
||||
)
|
||||
parser.add_argument("-u", "--url", type=str, required=False, help="URL of the LiveKit server")
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
room_name = args.room or os.getenv("LIVEKIT_ROOM_NAME")
|
||||
url = args.url or os.getenv("LIVEKIT_URL")
|
||||
api_key = os.getenv("LIVEKIT_API_KEY")
|
||||
api_secret = os.getenv("LIVEKIT_API_SECRET")
|
||||
|
||||
if not room_name:
|
||||
raise Exception(
|
||||
"No LiveKit room specified. Use the -r/--room option from the command line, or set LIVEKIT_ROOM_NAME in your environment."
|
||||
)
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No LiveKit server URL specified. Use the -u/--url option from the command line, or set LIVEKIT_URL in your environment."
|
||||
)
|
||||
|
||||
if not api_key or not api_secret:
|
||||
raise Exception(
|
||||
"LIVEKIT_API_KEY and LIVEKIT_API_SECRET must be set in environment variables."
|
||||
)
|
||||
|
||||
token = generate_token(room_name, "Say One Thing", api_key, api_secret)
|
||||
|
||||
user_token = generate_token(room_name, "User", api_key, api_secret)
|
||||
logger.info(f"User token: {user_token}")
|
||||
|
||||
return (url, token, room_name)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(url, token, room_name) = await configure_livekit()
|
||||
|
||||
transport = LiveKitTransport(
|
||||
url=url,
|
||||
token=token,
|
||||
room_name=room_name,
|
||||
params=LiveKitParams(audio_out_enabled=True, audio_out_sample_rate=16000),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant_id):
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frame(
|
||||
TextFrame(
|
||||
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
||||
)
|
||||
)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -13,7 +13,7 @@ from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -22,35 +22,34 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Say One Thing From an LLM",
|
||||
DailyParams(audio_out_enabled=True))
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing From an LLM", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an LLM in a WebRTC session, and this is a 'hello world' demo. Say hello to the world.",
|
||||
}]
|
||||
}
|
||||
]
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
@@ -64,5 +63,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -21,29 +21,26 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Show a still frame image",
|
||||
DailyParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024
|
||||
)
|
||||
DailyParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
@@ -64,5 +61,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -22,6 +22,7 @@ from pipecat.transports.local.tk import TkLocalTransport
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -35,15 +36,11 @@ async def main():
|
||||
|
||||
transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024))
|
||||
TransportParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
@@ -56,7 +53,7 @@ async def main():
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def run_tk():
|
||||
while runner.is_active():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
@@ -4,6 +4,10 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
#
|
||||
# This example broken on latest pipecat and needs updating.
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
@@ -24,14 +28,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(room_url, None, "Static And Dynamic Speech")
|
||||
|
||||
meeting = TransportServiceOutput(transport, mic_enabled=True)
|
||||
@@ -52,8 +59,7 @@ async def main(room_url: str):
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
messages = [{"role": "system",
|
||||
"content": "tell the user a joke about llamas"}]
|
||||
messages = [{"role": "system", "content": "tell the user a joke about llamas"}]
|
||||
|
||||
# Start a task to run the LLM to create a joke, and convert the LLM
|
||||
# output to audio frames. This task will run in parallel with generating
|
||||
@@ -71,8 +77,7 @@ async def main(room_url: str):
|
||||
]
|
||||
)
|
||||
|
||||
merge_pipeline = SequentialMergePipeline(
|
||||
[simple_tts_pipeline, llm_pipeline])
|
||||
merge_pipeline = SequentialMergePipeline([simple_tts_pipeline, llm_pipeline])
|
||||
|
||||
await asyncio.gather(
|
||||
transport.run(merge_pipeline),
|
||||
@@ -82,5 +87,4 @@ async def main(room_url: str):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -13,23 +13,19 @@ from dataclasses import dataclass
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
AppFrame,
|
||||
EndFrame,
|
||||
Frame,
|
||||
ImageRawFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMMessagesFrame,
|
||||
TextFrame
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.parallel_task import ParallelTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.aggregators.gated import GatedAggregator
|
||||
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -38,6 +34,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -73,8 +70,10 @@ class MonthPrepender(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
@@ -83,48 +82,46 @@ async def main(room_url):
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024
|
||||
)
|
||||
camera_out_height=1024,
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
gated_aggregator = GatedAggregator(
|
||||
gate_open_fn=lambda frame: isinstance(frame, ImageRawFrame),
|
||||
gate_close_fn=lambda frame: isinstance(frame, LLMFullResponseStartFrame),
|
||||
start_open=False
|
||||
)
|
||||
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
month_prepender = MonthPrepender()
|
||||
llm_full_response_aggregator = LLMFullResponseAggregator()
|
||||
|
||||
pipeline = Pipeline([
|
||||
llm, # LLM
|
||||
sentence_aggregator, # Aggregates LLM output into full sentences
|
||||
ParallelTask( # Run pipelines in parallel aggregating the result
|
||||
[month_prepender, tts], # Create "Month: sentence" and output audio
|
||||
[llm_full_response_aggregator, imagegen] # Aggregate full LLM response
|
||||
),
|
||||
gated_aggregator, # Queues everything until an image is available
|
||||
transport.output() # Transport output
|
||||
])
|
||||
# With `SyncParallelPipeline` we synchronize audio and images by pushing
|
||||
# them basically in order (e.g. I1 A1 A1 A1 I2 A2 A2 A2 A2 I3 A3). To do
|
||||
# that, each pipeline runs concurrently and `SyncParallelPipeline` will
|
||||
# wait for the input frame to be processed.
|
||||
#
|
||||
# Note that `SyncParallelPipeline` requires the last processor in each
|
||||
# of the pipelines to be synchronous. In this case, we use
|
||||
# `CartesiaHttpTTSService` and `FalImageGenService` which make HTTP
|
||||
# requests and wait for the response.
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
llm, # LLM
|
||||
sentence_aggregator, # Aggregates LLM output into full sentences
|
||||
SyncParallelPipeline( # Run pipelines in parallel aggregating the result
|
||||
[month_prepender, tts], # Create "Month: sentence" and output audio
|
||||
[imagegen], # Generate image
|
||||
),
|
||||
transport.output(), # Transport output
|
||||
]
|
||||
)
|
||||
|
||||
frames = []
|
||||
for month in [
|
||||
@@ -150,8 +147,6 @@ async def main(room_url):
|
||||
frames.append(MonthFrame(month=month))
|
||||
frames.append(LLMMessagesFrame(messages))
|
||||
|
||||
frames.append(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
@@ -162,5 +157,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -11,18 +11,25 @@ import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame, Frame, URLImageRawFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
OutputAudioRawFrame,
|
||||
TTSAudioRawFrame,
|
||||
URLImageRawFrame,
|
||||
LLMMessagesFrame,
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkOutputTransport
|
||||
|
||||
from loguru import logger
|
||||
|
||||
@@ -42,7 +49,12 @@ async def main():
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def get_month_data(month):
|
||||
messages = [{"role": "system", "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.", }]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.",
|
||||
}
|
||||
]
|
||||
|
||||
class ImageDescription(FrameProcessor):
|
||||
def __init__(self):
|
||||
@@ -60,14 +72,16 @@ async def main():
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.audio = bytearray()
|
||||
self.frame = None
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
if isinstance(frame, TTSAudioRawFrame):
|
||||
self.audio.extend(frame.audio)
|
||||
self.frame = AudioRawFrame(
|
||||
bytes(self.audio), frame.sample_rate, frame.num_channels)
|
||||
self.frame = OutputAudioRawFrame(
|
||||
bytes(self.audio), frame.sample_rate, frame.num_channels
|
||||
)
|
||||
|
||||
class ImageGrabber(FrameProcessor):
|
||||
def __init__(self):
|
||||
@@ -80,23 +94,20 @@ async def main():
|
||||
if isinstance(frame, URLImageRawFrame):
|
||||
self.frame = frame
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="square_hd"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="square_hd"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"))
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
aggregator = LLMFullResponseAggregator()
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
|
||||
description = ImageDescription()
|
||||
|
||||
@@ -104,13 +115,27 @@ async def main():
|
||||
|
||||
image_grabber = ImageGrabber()
|
||||
|
||||
pipeline = Pipeline([
|
||||
llm,
|
||||
aggregator,
|
||||
description,
|
||||
ParallelPipeline([tts, audio_grabber],
|
||||
[imagegen, image_grabber])
|
||||
])
|
||||
# With `SyncParallelPipeline` we synchronize audio and images by
|
||||
# pushing them basically in order (e.g. I1 A1 A1 A1 I2 A2 A2 A2 A2
|
||||
# I3 A3). To do that, each pipeline runs concurrently and
|
||||
# `SyncParallelPipeline` will wait for the input frame to be
|
||||
# processed.
|
||||
#
|
||||
# Note that `SyncParallelPipeline` requires the last processor in
|
||||
# each of the pipelines to be synchronous. In this case, we use
|
||||
# `CartesiaHttpTTSService` and `FalImageGenService` which make HTTP
|
||||
# requests and wait for the response.
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
llm, # LLM
|
||||
sentence_aggregator, # Aggregates LLM output into full sentences
|
||||
description, # Store sentence
|
||||
SyncParallelPipeline(
|
||||
[tts, audio_grabber], # Generate and store audio for the given sentence
|
||||
[imagegen, image_grabber], # Generate and storeimage for the given sentence
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frame(LLMMessagesFrame(messages))
|
||||
@@ -131,20 +156,19 @@ async def main():
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024))
|
||||
camera_out_height=1024,
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
# We only specify 5 months as we create tasks all at once and we might
|
||||
# get rate limited otherwise.
|
||||
# We only specify a few months as we create tasks all at once and we
|
||||
# might get rate limited otherwise.
|
||||
months: list[str] = [
|
||||
"January",
|
||||
"February",
|
||||
# "March",
|
||||
# "April",
|
||||
# "May",
|
||||
]
|
||||
|
||||
# We create one task per month. This will be executed concurrently.
|
||||
|
||||
@@ -9,16 +9,22 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.frames.frames import Frame, LLMMessagesFrame, MetricsFrame
|
||||
from pipecat.metrics.metrics import (
|
||||
TTFBMetricsData,
|
||||
ProcessingMetricsData,
|
||||
LLMUsageMetricsData,
|
||||
TTSUsageMetricsData,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -28,14 +34,37 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
class MetricsLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, MetricsFrame):
|
||||
for d in frame.data:
|
||||
if isinstance(d, TTFBMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, ttfb: {d.value}")
|
||||
elif isinstance(d, ProcessingMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, processing: {d.value}")
|
||||
elif isinstance(d, LLMUsageMetricsData):
|
||||
tokens = d.value
|
||||
print(
|
||||
f"!!! MetricsFrame: {frame}, tokens: {
|
||||
tokens.prompt_tokens}, characters: {
|
||||
tokens.completion_tokens}"
|
||||
)
|
||||
elif isinstance(d, TTSUsageMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, characters: {d.value}")
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -44,23 +73,18 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
fl = FrameLogger("!!! after LLM", "red")
|
||||
fltts = FrameLogger("@@@ out of tts", "green")
|
||||
flend = FrameLogger("### out of the end", "magenta")
|
||||
ml = MetricsLogger()
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -71,17 +95,17 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
fl,
|
||||
tts,
|
||||
fltts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
flend
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
ml,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -89,8 +113,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -99,5 +122,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -11,7 +11,7 @@ import sys
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import ImageRawFrame, Frame, SystemFrame, TextFrame
|
||||
from pipecat.frames.frames import Frame, OutputImageRawFrame, SystemFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
@@ -20,8 +20,8 @@ from pipecat.processors.aggregators.llm_response import (
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
@@ -31,6 +31,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -51,16 +52,30 @@ class ImageSyncAggregator(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if not isinstance(frame, SystemFrame):
|
||||
await self.push_frame(ImageRawFrame(image=self._speaking_image_bytes, size=(1024, 1024), format=self._speaking_image_format))
|
||||
if not isinstance(frame, SystemFrame) and direction == FrameDirection.DOWNSTREAM:
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(
|
||||
image=self._speaking_image_bytes,
|
||||
size=(1024, 1024),
|
||||
format=self._speaking_image_format,
|
||||
)
|
||||
)
|
||||
await self.push_frame(frame)
|
||||
await self.push_frame(ImageRawFrame(image=self._waiting_image_bytes, size=(1024, 1024), format=self._waiting_image_format))
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(
|
||||
image=self._waiting_image_bytes,
|
||||
size=(1024, 1024),
|
||||
format=self._waiting_image_format,
|
||||
)
|
||||
)
|
||||
else:
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -73,18 +88,15 @@ async def main(room_url: str, token):
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -101,21 +113,23 @@ async def main(room_url: str, token):
|
||||
os.path.join(os.path.dirname(__file__), "assets", "waiting.png"),
|
||||
)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
image_sync_aggregator,
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
image_sync_aggregator,
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
participant_name = participant["info"]["userName"] or ""
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([TextFrame(f"Hi there {participant_name}!")])
|
||||
|
||||
@@ -125,5 +139,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,8 +14,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -25,14 +27,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -41,19 +46,16 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -65,27 +67,32 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -94,5 +101,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,8 +14,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -25,14 +27,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -41,19 +46,18 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-opus-20240229")
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-opus-20240229"
|
||||
)
|
||||
|
||||
# todo: think more about how to handle system prompts in a more general way. OpenAI,
|
||||
# Google, and Anthropic all have slightly different approaches to providing a system
|
||||
@@ -68,14 +72,16 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -91,5 +97,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -15,9 +15,11 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
@@ -32,6 +34,7 @@ from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -47,8 +50,10 @@ def get_session_history(session_id: str) -> BaseChatMessageHistory:
|
||||
return message_store[session_id]
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -61,27 +66,29 @@ async def main(room_url: str, token):
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system",
|
||||
"Be nice and helpful. Answer very briefly and without special characters like `#` or `*`. "
|
||||
"Your response will be synthesized to voice and those characters will create unnatural sounds.",
|
||||
),
|
||||
(
|
||||
"system",
|
||||
"Be nice and helpful. Answer very briefly and without special characters like `#` or `*`. "
|
||||
"Your response will be synthesized to voice and those characters will create unnatural sounds.",
|
||||
),
|
||||
MessagesPlaceholder("chat_history"),
|
||||
("human", "{input}"),
|
||||
])
|
||||
]
|
||||
)
|
||||
chain = prompt | ChatOpenAI(model="gpt-4o", temperature=0.7)
|
||||
history_chain = RunnableWithMessageHistory(
|
||||
chain,
|
||||
get_session_history,
|
||||
history_messages_key="chat_history",
|
||||
input_messages_key="input")
|
||||
input_messages_key="input",
|
||||
)
|
||||
lc = LangchainProcessor(history_chain)
|
||||
|
||||
tma_in = LLMUserResponseAggregator()
|
||||
@@ -89,12 +96,12 @@ async def main(room_url: str, token):
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
lc, # Langchain
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
lc, # Langchain
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
@@ -108,11 +115,7 @@ async def main(room_url: str, token):
|
||||
# the `LLMMessagesFrame` will be picked up by the LangchainProcessor using
|
||||
# only the content of the last message to inject it in the prompt defined
|
||||
# above. So no role is required here.
|
||||
messages = [(
|
||||
{
|
||||
"content": "Please briefly introduce yourself to the user."
|
||||
}
|
||||
)]
|
||||
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -121,5 +124,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -5,34 +5,37 @@
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -41,21 +44,15 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True
|
||||
)
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
voice="aura-helios-en"
|
||||
)
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -67,15 +64,17 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -83,8 +82,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -93,5 +91,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_sample_rate=44100,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
||||
sample_rate=44100,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
tma_out, # Goes before the transport because cartesia has word-level timestamps!
|
||||
transport.output(), # Transport bot output
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
102
examples/foundational/07d-interruptible-elevenlabs.py
Normal file
102
examples/foundational/07d-interruptible-elevenlabs.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
@@ -13,7 +14,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.playht import PlayHTTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -24,70 +27,72 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
tts = PlayHTTTSService(
|
||||
user_id=os.getenv("PLAYHT_USER_ID"),
|
||||
api_key=os.getenv("PLAYHT_API_KEY"),
|
||||
voice_url="s3://voice-cloning-zero-shot/801a663f-efd0-4254-98d0-5c175514c3e8/jennifer/manifest.json",
|
||||
)
|
||||
tts = PlayHTTTSService(
|
||||
user_id=os.getenv("PLAYHT_USER_ID"),
|
||||
api_key=os.getenv("PLAYHT_API_KEY"),
|
||||
voice_url="s3://voice-cloning-zero-shot/801a663f-efd0-4254-98d0-5c175514c3e8/jennifer/manifest.json",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
@@ -13,7 +14,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.azure import AzureLLMService, AzureSTTService, AzureTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -24,77 +27,81 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
stt = AzureSTTService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
stt = AzureSTTService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
llm = AzureLLMService(
|
||||
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
|
||||
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
|
||||
model=os.getenv("AZURE_CHATGPT_MODEL"),
|
||||
)
|
||||
llm = AzureLLMService(
|
||||
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
|
||||
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
|
||||
model=os.getenv("AZURE_CHATGPT_MODEL"),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
@@ -13,7 +14,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.openai import OpenAITTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -24,69 +27,68 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
tts = OpenAITTSService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
voice="alloy"
|
||||
)
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="alloy")
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -17,7 +17,7 @@ from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openpipe import OpenPipeLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -28,14 +28,17 @@ from loguru import logger
|
||||
import time
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -44,14 +47,13 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
timestamp = int(time.time())
|
||||
@@ -59,9 +61,7 @@ async def main(room_url: str, token):
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
openpipe_api_key=os.getenv("OPENPIPE_API_KEY"),
|
||||
model="gpt-4o",
|
||||
tags={
|
||||
"conversation_id": f"pipecat-{timestamp}"
|
||||
}
|
||||
tags={"conversation_id": f"pipecat-{timestamp}"},
|
||||
)
|
||||
|
||||
messages = [
|
||||
@@ -73,14 +73,16 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -88,8 +90,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -98,5 +99,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,7 +14,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.xtts import XTTSService
|
||||
@@ -26,14 +28,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -43,19 +48,17 @@ async def main(room_url: str, token):
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
tts = XTTSService(
|
||||
aiohttp_session=session,
|
||||
voice_id="Claribel Dervla",
|
||||
language="en",
|
||||
base_url="http://localhost:8000"
|
||||
base_url="http://localhost:8000",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -67,14 +70,16 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -82,8 +87,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -92,5 +96,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,11 +14,12 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.gladia import GladiaSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.xtts import XTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
@@ -27,14 +28,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -44,22 +48,19 @@ async def main(room_url: str, token):
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
stt = GladiaSTTService(
|
||||
api_key=os.getenv("GLADIA_API_KEY"),
|
||||
)
|
||||
|
||||
tts = DeepgramTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
voice="aura-helios-en"
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -71,15 +72,17 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -87,8 +90,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -97,5 +99,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
94
examples/foundational/07k-interruptible-lmnt.py
Normal file
94
examples/foundational/07k-interruptible-lmnt.py
Normal file
@@ -0,0 +1,94 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.lmnt import LmntTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = LmntTTSService(api_key=os.getenv("LMNT_API_KEY"), voice_id="morgan")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
107
examples/foundational/07l-interruptible-together.py
Normal file
107
examples/foundational/07l-interruptible-together.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.together import TogetherLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = TogetherLLMService(
|
||||
api_key=os.getenv("TOGETHER_API_KEY"),
|
||||
model=os.getenv("TOGETHER_MODEL"),
|
||||
params=TogetherLLMService.InputParams(
|
||||
temperature=1.0,
|
||||
top_p=0.9,
|
||||
top_k=40,
|
||||
extra={
|
||||
"frequency_penalty": 2.0,
|
||||
"presence_penalty": 0.0,
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
102
examples/foundational/07m-interruptible-aws.py
Normal file
102
examples/foundational/07m-interruptible-aws.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.aws import AWSTTSService
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = AWSTTSService(
|
||||
api_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
voice_id="Amy",
|
||||
params=AWSTTSService.InputParams(engine="neural", language="en-GB", rate="1.05"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
100
examples/foundational/07n-interruptible-google.py
Normal file
100
examples/foundational/07n-interruptible-google.py
Normal file
@@ -0,0 +1,100 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.services.google import GoogleTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = GoogleTTSService(
|
||||
credentials=os.getenv("GOOGLE_CREDENTIALS"),
|
||||
voice_id="en-US-Neural2-J",
|
||||
params=GoogleTTSService.InputParams(language="en-US", rate="1.05"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -3,18 +3,19 @@ import aiohttp
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from pipecat.pipeline.aggregators import SentenceAggregator
|
||||
from pipecat.processors.aggregators import SentenceAggregator
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
|
||||
from pipecat.transports.daily_transport import DailyTransport
|
||||
from pipecat.services.azure_ai_services import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService
|
||||
from pipecat.services.fal_ai_services import FalImageGenService
|
||||
from pipecat.pipeline.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.transports.services.daily import DailyTransport
|
||||
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
|
||||
from runner import configure
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
|
||||
@@ -22,8 +23,10 @@ logger = logging.getLogger("pipecat")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
@@ -51,9 +54,7 @@ async def main(room_url: str):
|
||||
voice_id="jBpfuIE2acCO8z3wKNLl",
|
||||
)
|
||||
dalle = FalImageGenService(
|
||||
params=FalImageGenService.InputParams(
|
||||
image_size="1024x1024"
|
||||
),
|
||||
params=FalImageGenService.InputParams(image_size="1024x1024"),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
@@ -73,13 +74,11 @@ async def main(room_url: str):
|
||||
|
||||
async def get_text_and_audio(messages) -> Tuple[str, bytearray]:
|
||||
"""This function streams text from the LLM and uses the TTS service to convert
|
||||
that text to speech as it's received. """
|
||||
that text to speech as it's received."""
|
||||
source_queue = asyncio.Queue()
|
||||
sink_queue = asyncio.Queue()
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
pipeline = Pipeline(
|
||||
[llm, sentence_aggregator, tts1], source_queue, sink_queue
|
||||
)
|
||||
pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)
|
||||
|
||||
await source_queue.put(LLMMessagesFrame(messages))
|
||||
await source_queue.put(EndFrame())
|
||||
@@ -144,5 +143,4 @@ async def main(room_url: str):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,12 +4,21 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
InputImageRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputImageRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.services.daily import DailyTransport, DailyParams
|
||||
|
||||
from runner import configure
|
||||
@@ -17,38 +26,63 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url, token):
|
||||
transport = DailyTransport(
|
||||
room_url, token, "Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720
|
||||
class MirrorProcessor(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, InputAudioRawFrame):
|
||||
await self.push_frame(
|
||||
OutputAudioRawFrame(
|
||||
audio=frame.audio,
|
||||
sample_rate=frame.sample_rate,
|
||||
num_channels=frame.num_channels,
|
||||
)
|
||||
)
|
||||
elif isinstance(frame, InputImageRawFrame):
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(image=frame.image, size=frame.size, format=frame.format)
|
||||
)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
|
||||
pipeline = Pipeline([transport.input(), transport.output()])
|
||||
pipeline = Pipeline([transport.input(), MirrorProcessor(), transport.output()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,14 +4,23 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
InputImageRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputImageRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -21,46 +30,73 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url, token):
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Local Mirror")
|
||||
class MirrorProcessor(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
daily_transport = DailyTransport(room_url, token, "Test", DailyParams(audio_in_enabled=True))
|
||||
if isinstance(frame, InputAudioRawFrame):
|
||||
await self.push_frame(
|
||||
OutputAudioRawFrame(
|
||||
audio=frame.audio,
|
||||
sample_rate=frame.sample_rate,
|
||||
num_channels=frame.num_channels,
|
||||
)
|
||||
)
|
||||
elif isinstance(frame, InputImageRawFrame):
|
||||
await self.push_frame(
|
||||
OutputImageRawFrame(image=frame.image, size=frame.size, format=frame.format)
|
||||
)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720))
|
||||
|
||||
@daily_transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
pipeline = Pipeline([daily_transport.input(), tk_transport.output()])
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Local Mirror")
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
daily_transport = DailyTransport(
|
||||
room_url, token, "Test", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
async def run_tk():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
),
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
@daily_transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
|
||||
await asyncio.gather(runner.run(task), run_tk())
|
||||
pipeline = Pipeline([daily_transport.input(), MirrorProcessor(), tk_transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
async def run_tk():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await asyncio.gather(runner.run(task), run_tk())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,8 +14,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -25,15 +27,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -42,19 +46,16 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -67,15 +68,17 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
hey_robot_filter, # Filter out speech not directed at the robot
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
hey_robot_filter, # Filter out speech not directed at the robot
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -90,5 +93,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -12,9 +12,9 @@ import wave
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
AudioRawFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMMessagesFrame,
|
||||
OutputAudioRawFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -25,7 +25,7 @@ from pipecat.processors.aggregators.llm_response import (
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -35,6 +35,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -53,12 +54,12 @@ for file in sound_files:
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the image and convert it to bytes
|
||||
with wave.open(full_path) as audio_file:
|
||||
sounds[file] = AudioRawFrame(audio_file.readframes(-1),
|
||||
audio_file.getframerate(), audio_file.getnchannels())
|
||||
sounds[file] = OutputAudioRawFrame(
|
||||
audio_file.readframes(-1), audio_file.getframerate(), audio_file.getnchannels()
|
||||
)
|
||||
|
||||
|
||||
class OutboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
@@ -71,7 +72,6 @@ class OutboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
|
||||
class InboundSoundEffectWrapper(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
@@ -83,8 +83,10 @@ class InboundSoundEffectWrapper(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -93,18 +95,15 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="ErXwobaYiN019PkySvjV",
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
messages = [
|
||||
@@ -121,18 +120,20 @@ async def main(room_url: str, token):
|
||||
fl = FrameLogger("LLM Out")
|
||||
fl2 = FrameLogger("Transcription In")
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
tma_in,
|
||||
in_sound,
|
||||
fl2,
|
||||
llm,
|
||||
fl,
|
||||
tts,
|
||||
out_sound,
|
||||
transport.output(),
|
||||
tma_out
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
in_sound,
|
||||
fl2,
|
||||
llm,
|
||||
fl,
|
||||
tts,
|
||||
out_sound,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
@@ -148,5 +149,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -26,6 +26,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +34,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -45,12 +45,16 @@ class UserImageRequester(FrameProcessor):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -59,14 +63,8 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
@@ -78,10 +76,9 @@ async def main(room_url: str, token):
|
||||
# If you run into weird description, try with use_cpu=True
|
||||
moondream = MoondreamService()
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -91,15 +88,17 @@ async def main(room_url: str, token):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
moondream,
|
||||
tts,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
moondream,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -107,6 +106,6 @@ async def main(room_url: str, token):
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -26,6 +26,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +34,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -45,12 +45,16 @@ class UserImageRequester(FrameProcessor):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -60,8 +64,8 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
@@ -71,13 +75,12 @@ async def main(room_url: str, token):
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
google = GoogleLLMService(
|
||||
model="gemini-1.5-flash-latest",
|
||||
api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
model="gemini-1.5-flash-latest", api_key=os.getenv("GOOGLE_API_KEY")
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -87,15 +90,17 @@ async def main(room_url: str, token):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
google,
|
||||
tts,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
google,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -103,6 +108,6 @@ async def main(room_url: str, token):
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -26,6 +26,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +34,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -45,12 +45,16 @@ class UserImageRequester(FrameProcessor):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -59,8 +63,8 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
@@ -69,15 +73,11 @@ async def main(room_url: str, token):
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
openai = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o"
|
||||
)
|
||||
openai = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -87,15 +87,17 @@ async def main(room_url: str, token):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
openai,
|
||||
tts,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
openai,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -103,6 +105,6 @@ async def main(room_url: str, token):
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -26,6 +26,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +34,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
@@ -45,12 +45,16 @@ class UserImageRequester(FrameProcessor):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._participant_id and isinstance(frame, TextFrame):
|
||||
await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -59,8 +63,8 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
@@ -69,15 +73,14 @@ async def main(room_url: str, token):
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
anthropic = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-sonnet-20240229"
|
||||
)
|
||||
anthropic = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY"))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
params=CartesiaTTSService.InputParams(
|
||||
sample_rate=16000,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -87,15 +90,17 @@ async def main(room_url: str, token):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
image_requester.set_participant_id(participant["id"])
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
anthropic,
|
||||
tts,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
image_requester,
|
||||
vision_aggregator,
|
||||
anthropic,
|
||||
tts,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -103,6 +108,6 @@ async def main(room_url: str, token):
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
@@ -20,6 +21,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -27,7 +29,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
@@ -35,23 +36,26 @@ class TranscriptionLogger(FrameProcessor):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
transport = DailyTransport(room_url, None, "Transcription bot",
|
||||
DailyParams(audio_in_enabled=True))
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
stt = WhisperSTTService()
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Transcription bot", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
stt = WhisperSTTService()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
runner = PipelineRunner()
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -19,6 +19,7 @@ from pipecat.transports.local.audio import LocalAudioTransport
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -26,7 +27,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
@@ -21,6 +22,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -28,7 +30,6 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
@@ -36,23 +37,26 @@ class TranscriptionLogger(FrameProcessor):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
transport = DailyTransport(room_url, None, "Transcription bot",
|
||||
DailyParams(audio_in_enabled=True))
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
stt = DeepgramSTTService(os.getenv("DEEPGRAM_API_KEY"))
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Transcription bot", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
runner = PipelineRunner()
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -5,47 +5,48 @@
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator,
|
||||
)
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def start_fetch_weather(llm):
|
||||
await llm.push_frame(TextFrame("Let me think."))
|
||||
async def start_fetch_weather(function_name, llm, context):
|
||||
# note: we can't push a frame to the LLM here. the bot
|
||||
# can interrupt itself and/or cause audio overlapping glitches.
|
||||
# possible question for Aleix and Chad about what the right way
|
||||
# to trigger speech is, now, with the new queues/async/sync refactors.
|
||||
await llm.push_frame(TextFrame("Let me check on that. "))
|
||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
||||
|
||||
|
||||
async def fetch_weather_from_api(llm, args):
|
||||
return {"conditions": "nice", "temperature": "75"}
|
||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -54,23 +55,19 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm.register_function(
|
||||
"get_current_weather",
|
||||
fetch_weather_from_api,
|
||||
start_callback=start_fetch_weather)
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
# Register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
fl_in = FrameLogger("Inner")
|
||||
fl_out = FrameLogger("Outer")
|
||||
@@ -90,17 +87,15 @@ async def main(room_url: str, token):
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"celsius",
|
||||
"fahrenheit"],
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": [
|
||||
"location",
|
||||
"format"],
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
})]
|
||||
},
|
||||
)
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -109,22 +104,24 @@ async def main(room_url: str, token):
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
tma_in = LLMUserContextAggregator(context)
|
||||
tma_out = LLMAssistantContextAggregator(context)
|
||||
pipeline = Pipeline([
|
||||
fl_in,
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
fl_out,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out
|
||||
])
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
# fl_in,
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
# fl_out,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@ transport.event_handler("on_first_participant_joined")
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
@@ -136,5 +133,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -14,10 +14,6 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.filters.function_filter import FunctionFilter
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
@@ -32,6 +28,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -40,10 +37,14 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
current_voice = "News Lady"
|
||||
|
||||
|
||||
async def switch_voice(llm, args):
|
||||
async def switch_voice(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
global current_voice
|
||||
current_voice = args["voice"]
|
||||
return {"voice": f"You are now using your {current_voice} voice. Your responses should now be as if you were a {current_voice}."}
|
||||
await result_callback(
|
||||
{
|
||||
"voice": f"You are now using your {current_voice} voice. Your responses should now be as if you were a {current_voice}."
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def news_lady_filter(frame) -> bool:
|
||||
@@ -58,8 +59,10 @@ async def barbershop_man_filter(frame) -> bool:
|
||||
return current_voice == "Barbershop Man"
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -68,8 +71,8 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
news_lady = CartesiaTTSService(
|
||||
@@ -87,9 +90,7 @@ async def main(room_url: str, token):
|
||||
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm.register_function("switch_voice", switch_voice)
|
||||
|
||||
tools = [
|
||||
@@ -108,7 +109,9 @@ async def main(room_url: str, token):
|
||||
},
|
||||
"required": ["voice"],
|
||||
},
|
||||
})]
|
||||
},
|
||||
)
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -117,21 +120,22 @@ async def main(room_url: str, token):
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
tma_in = LLMUserContextAggregator(context)
|
||||
tma_out = LLMAssistantContextAggregator(context)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (one of the following vocies)
|
||||
[FunctionFilter(news_lady_filter), news_lady], # News Lady voice
|
||||
[FunctionFilter(british_lady_filter), british_lady], # British Lady voice
|
||||
[FunctionFilter(barbershop_man_filter), barbershop_man], # Barbershop Man voice
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (one of the following vocies)
|
||||
[FunctionFilter(news_lady_filter), news_lady], # News Lady voice
|
||||
[FunctionFilter(british_lady_filter), british_lady], # British Lady voice
|
||||
[FunctionFilter(barbershop_man_filter), barbershop_man], # Barbershop Man voice
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -142,7 +146,9 @@ async def main(room_url: str, token):
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Please introduce yourself to the user and let them know the voices you can do. Your initial responses should be as if you were a {current_voice}."})
|
||||
"content": f"Please introduce yourself to the user and let them know the voices you can do. Your initial responses should be as if you were a {current_voice}.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -151,5 +157,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,13 +14,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.filters.function_filter import FunctionFilter
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.whisper import Model, WhisperSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -33,6 +29,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -41,10 +38,10 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
current_language = "English"
|
||||
|
||||
|
||||
async def switch_language(llm, args):
|
||||
async def switch_language(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
global current_language
|
||||
current_language = args["language"]
|
||||
return {"voice": f"Your answers from now on should be in {current_language}."}
|
||||
await result_callback({"voice": f"Your answers from now on should be in {current_language}."})
|
||||
|
||||
|
||||
async def english_filter(frame) -> bool:
|
||||
@@ -55,8 +52,10 @@ async def spanish_filter(frame) -> bool:
|
||||
return current_language == "Spanish"
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -66,28 +65,23 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True
|
||||
)
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = WhisperSTTService(model=Model.LARGE)
|
||||
|
||||
english_tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
english_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
spanish_tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
model="eleven_multilingual_v2",
|
||||
voice_id="9F4C8ztpNUmXkdDDbz3J",
|
||||
spanish_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="846d6cb0-2301-48b6-9683-48f5618ea2f6", # Spanish-speaking Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm.register_function("switch_language", switch_language)
|
||||
|
||||
tools = [
|
||||
@@ -106,7 +100,9 @@ async def main(room_url: str, token):
|
||||
},
|
||||
"required": ["language"],
|
||||
},
|
||||
})]
|
||||
},
|
||||
)
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -115,21 +111,22 @@ async def main(room_url: str, token):
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
tma_in = LLMUserContextAggregator(context)
|
||||
tma_out = LLMAssistantContextAggregator(context)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (bot will speak the chosen language)
|
||||
[FunctionFilter(english_filter), english_tts], # English
|
||||
[FunctionFilter(spanish_filter), spanish_tts], # Spanish
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (bot will speak the chosen language)
|
||||
[FunctionFilter(english_filter), english_tts], # English
|
||||
[FunctionFilter(spanish_filter), spanish_tts], # Spanish
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@@ -140,7 +137,9 @@ async def main(room_url: str, token):
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Please introduce yourself to the user and let them know the languages you speak. Your initial responses should be in {current_language}."})
|
||||
"content": f"Please introduce yourself to the user and let them know the languages you speak. Your initial responses should be in {current_language}.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -149,5 +148,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -8,17 +8,22 @@ import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.deepgram import DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyTransportMessageFrame
|
||||
from pipecat.transports.services.daily import (
|
||||
DailyParams,
|
||||
DailyTransport,
|
||||
DailyTransportMessageFrame,
|
||||
)
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
@@ -26,14 +31,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -42,15 +50,15 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = DeepgramTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
voice="aura-asteria-en",
|
||||
base_url="http://0.0.0.0:8080/v1/speak"
|
||||
base_url="http://0.0.0.0:8080/v1/speak",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
@@ -59,7 +67,7 @@ async def main(room_url: str, token):
|
||||
# model="gpt-4o"
|
||||
# Or, to use a local vLLM (or similar) api server
|
||||
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
||||
base_url="http://0.0.0.0:8000/v1"
|
||||
base_url="http://0.0.0.0:8000/v1",
|
||||
)
|
||||
|
||||
messages = [
|
||||
@@ -72,14 +80,16 @@ async def main(room_url: str, token):
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@@ -92,8 +102,7 @@ async def main(room_url: str, token):
|
||||
# When the first participant joins, the bot should introduce itself.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
# Handle "latency-ping" messages. The client will send app messages that look like
|
||||
@@ -110,14 +119,18 @@ async def main(room_url: str, token):
|
||||
logger.debug(f"Received latency ping app message: {message}")
|
||||
ts = message["latency-ping"]["ts"]
|
||||
# Send immediately
|
||||
transport.output().send_message(DailyTransportMessageFrame(
|
||||
message={"latency-pong-msg-handler": {"ts": ts}},
|
||||
participant_id=sender))
|
||||
transport.output().send_message(
|
||||
DailyTransportMessageFrame(
|
||||
message={"latency-pong-msg-handler": {"ts": ts}}, participant_id=sender
|
||||
)
|
||||
)
|
||||
# And push to the pipeline for the Daily transport.output to send
|
||||
await tma_in.push_frame(
|
||||
DailyTransportMessageFrame(
|
||||
message={"latency-pong-pipeline-delivery": {"ts": ts}},
|
||||
participant_id=sender))
|
||||
participant_id=sender,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"message handling error: {e} - {message}")
|
||||
|
||||
@@ -126,5 +139,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,10 +14,11 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -27,14 +28,17 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -43,19 +47,16 @@ async def main(room_url: str, token):
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -69,33 +70,41 @@ async def main(room_url: str, token):
|
||||
|
||||
async def user_idle_callback(user_idle: UserIdleProcessor):
|
||||
messages.append(
|
||||
{"role": "system", "content": "Ask the user if they are still there and try to prompt for some input, but be short."})
|
||||
await user_idle.queue_frame(LLMMessagesFrame(messages))
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Ask the user if they are still there and try to prompt for some input, but be short.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
|
||||
user_idle = UserIdleProcessor(callback=user_idle_callback, timeout=5.0)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
user_idle, # Idle user check-in
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
user_idle, # Idle user check-in
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -104,5 +113,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
76
examples/foundational/18-gstreamer-filesrc.py
Normal file
76
examples/foundational/18-gstreamer-filesrc.py
Normal file
@@ -0,0 +1,76 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure_with_args
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument("-i", "--input", type=str, required=True, help="Input video file")
|
||||
|
||||
(room_url, _, args) = await configure_with_args(session, parser)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"GStreamer",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_is_live=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
camera_out_is_live=True,
|
||||
),
|
||||
)
|
||||
|
||||
gst = GStreamerPipelineSource(
|
||||
pipeline=f"filesrc location={args.input}",
|
||||
out_params=GStreamerPipelineSource.OutputParams(
|
||||
video_width=1280,
|
||||
video_height=720,
|
||||
audio_sample_rate=16000,
|
||||
audio_channels=1,
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
gst, # GStreamer file source
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
67
examples/foundational/18a-gstreamer-videotestsrc.py
Normal file
67
examples/foundational/18a-gstreamer-videotestsrc.py
Normal file
@@ -0,0 +1,67 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"GStreamer",
|
||||
DailyParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
camera_out_is_live=True,
|
||||
),
|
||||
)
|
||||
|
||||
gst = GStreamerPipelineSource(
|
||||
pipeline='videotestsrc ! capsfilter caps="video/x-raw,width=1280,height=720,framerate=30/1"',
|
||||
out_params=GStreamerPipelineSource.OutputParams(
|
||||
video_width=1280, video_height=720, clock_sync=False
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
gst, # GStreamer file source
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
118
examples/foundational/19a-tools-anthropic.py
Normal file
118
examples/foundational/19a-tools-anthropic.py
Normal file
@@ -0,0 +1,118 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-5-sonnet-20240620"
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
# messages = [{"role": "system",
|
||||
# "content": "You are a helpful assistant who can report the weather in any location in the universe. Respond concisely. Your response will be turned into speech so use only simple words and punctuation."},
|
||||
# {"role": "user",
|
||||
# "content": " Start the conversation by introducing yourself."}]
|
||||
|
||||
messages = [{"role": "user", "content": "Say 'hello' to start the conversation."}]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User spoken responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
173
examples/foundational/19b-tools-video-anthropic.py
Normal file
173
examples/foundational/19b-tools-video-anthropic.py
Normal file
@@ -0,0 +1,173 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
video_participant_id = None
|
||||
|
||||
|
||||
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def get_image(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
question = arguments["question"]
|
||||
await llm.request_image_frame(user_id=video_participant_id, text_content=question)
|
||||
|
||||
|
||||
async def main():
|
||||
global llm
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-5-sonnet-20240620",
|
||||
enable_prompt_caching_beta=True,
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
llm.register_function("get_image", get_image)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "get_image",
|
||||
"description": "Get an image from the video stream.",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
"required": ["question"],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
system_prompt = """\
|
||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||
|
||||
Your response will be turned into speech so use only simple words and punctuation.
|
||||
|
||||
You have access to two tools: get_weather and get_image.
|
||||
|
||||
You can respond to questions about the weather using the get_weather tool.
|
||||
|
||||
You can answer questions about the user's video stream using the get_image tool. Some examples of phrases that \
|
||||
indicate you should use the get_image tool are:
|
||||
- What do you see?
|
||||
- What's in the video?
|
||||
- Can you describe the video?
|
||||
- Tell me about what you see.
|
||||
- Tell me something interesting about what you see.
|
||||
- What's happening in the video?
|
||||
|
||||
If you need to use a tool, simply use the tool. Do not tell the user the tool you are using. Be brief and concise.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": system_prompt,
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "user", "content": "Start the conversation by introducing yourself."},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User speech to text
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
global video_participant_id
|
||||
video_participant_id = participant["id"]
|
||||
transport.capture_participant_transcription(video_participant_id)
|
||||
transport.capture_participant_video(video_participant_id, framerate=0)
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
137
examples/foundational/19c-tools-togetherai.py
Normal file
137
examples/foundational/19c-tools-togetherai.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.together import TogetherLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def get_current_weather(
|
||||
function_name, tool_call_id, arguments, llm, context, result_callback
|
||||
):
|
||||
logger.debug("IN get_current_weather")
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = TogetherLLMService(
|
||||
api_key=os.getenv("TOGETHER_API_KEY"),
|
||||
model=os.getenv("TOGETHER_MODEL"),
|
||||
)
|
||||
llm.register_function("get_current_weather", get_current_weather)
|
||||
|
||||
weatherTool = {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
|
||||
system_prompt = f"""\
|
||||
You have access to the following functions:
|
||||
|
||||
Use the function '{weatherTool["name"]}' to '{weatherTool["description"]}':
|
||||
{json.dumps(weatherTool)}
|
||||
|
||||
If you choose to call a function ONLY reply in the following format with no prefix or suffix:
|
||||
|
||||
<function=example_function_name>{{\"example_name\": \"example_value\"}}</function>
|
||||
|
||||
Reminder:
|
||||
- Function calls MUST follow the specified format, start with <function= and end with </function>
|
||||
- Required parameters MUST be specified
|
||||
- Only call one function at a time
|
||||
- Put the entire function call reply on one line
|
||||
- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls
|
||||
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": "Wait for the user to say something."},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User speech to text
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,18 +1,29 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
(url, token, _) = await configure_with_args(aiohttp_session)
|
||||
return (url, token)
|
||||
|
||||
|
||||
async def configure_with_args(
|
||||
aiohttp_session: aiohttp.ClientSession, parser: argparse.ArgumentParser | None = None
|
||||
):
|
||||
if not parser:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +39,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return (url, token)
|
||||
return (url, token, args)
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
@@ -7,10 +13,11 @@ from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
ImageRawFrame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
Frame,
|
||||
LLMMessagesFrame,
|
||||
AudioRawFrame,
|
||||
TTSAudioRawFrame,
|
||||
TTSStoppedFrame,
|
||||
TextFrame,
|
||||
UserImageRawFrame,
|
||||
@@ -25,7 +32,7 @@ from pipecat.processors.aggregators.llm_response import LLMUserResponseAggregato
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -36,6 +43,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -53,7 +61,7 @@ for i in range(1, 26):
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
@@ -76,7 +84,7 @@ class TalkingAnimation(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
if isinstance(frame, TTSAudioRawFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
@@ -99,7 +107,9 @@ class UserImageRequester(FrameProcessor):
|
||||
|
||||
if self.participant_id and isinstance(frame, TextFrame):
|
||||
if frame.text == user_request_answer:
|
||||
await self.push_frame(UserImageRequestFrame(self.participant_id), FrameDirection.UPSTREAM)
|
||||
await self.push_frame(
|
||||
UserImageRequestFrame(self.participant_id), FrameDirection.UPSTREAM
|
||||
)
|
||||
await self.push_frame(TextFrame("Describe the image in a short sentence."))
|
||||
elif isinstance(frame, UserImageRawFrame):
|
||||
await self.push_frame(frame)
|
||||
@@ -128,8 +138,10 @@ class ImageFilterProcessor(FrameProcessor):
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -141,19 +153,16 @@ async def main(room_url: str, token):
|
||||
camera_out_height=576,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
@@ -176,17 +185,17 @@ async def main(room_url: str, token):
|
||||
|
||||
ura = LLMUserResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
ura,
|
||||
llm,
|
||||
ParallelPipeline(
|
||||
[sa, ir, va, moondream],
|
||||
[tf, imgf]),
|
||||
tts,
|
||||
ta,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
ura,
|
||||
llm,
|
||||
ParallelPipeline([sa, ir, va, moondream], [tf, imgf]),
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frame(quiet_frame)
|
||||
@@ -204,5 +213,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,moondream,openai,silero]
|
||||
pipecat-ai[daily,cartesia,moondream,openai,silero]
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +31,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
@@ -1,31 +1,52 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -39,45 +60,42 @@ app.add_middleware(
|
||||
@app.get("/start")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room_url, room_name = _create_room()
|
||||
print(f"!!! Room URL: {room_url}")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
print(f"!!! Room URL: {room.url}")
|
||||
# Ensure the room property is present
|
||||
if not room_url:
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
|
||||
)
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
|
||||
)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
],
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
bot_procs[proc.pid] = (proc, room.url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
return RedirectResponse(room.url)
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
@@ -87,8 +105,7 @@ def get_status(pid: int):
|
||||
|
||||
# If the subprocess doesn't exist, return an error
|
||||
if not proc:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
|
||||
# Check the status of the subprocess
|
||||
if proc[0].poll() is None:
|
||||
@@ -105,14 +122,10 @@ if __name__ == "__main__":
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Moondream FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
parser = argparse.ArgumentParser(description="Daily Moondream FastAPI server")
|
||||
parser.add_argument("--host", type=str, default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int, default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
@@ -10,24 +10,14 @@ import os
|
||||
import sys
|
||||
import wave
|
||||
|
||||
from typing import List
|
||||
|
||||
from openai._types import NotGiven, NOT_GIVEN
|
||||
|
||||
from openai.types.chat import (
|
||||
ChatCompletionToolParam,
|
||||
)
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame
|
||||
from pipecat.frames.frames import OutputAudioRawFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMUserContextAggregator, LLMAssistantContextAggregator
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMContextFrame, OpenAILLMService
|
||||
from pipecat.services.ai_services import AIService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
@@ -36,6 +26,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -59,52 +50,46 @@ for file in sound_files:
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the sound and convert it to bytes
|
||||
with wave.open(full_path) as audio_file:
|
||||
sounds[file] = AudioRawFrame(audio_file.readframes(-1),
|
||||
audio_file.getframerate(), audio_file.getnchannels())
|
||||
sounds[file] = OutputAudioRawFrame(
|
||||
audio_file.readframes(-1), audio_file.getframerate(), audio_file.getnchannels()
|
||||
)
|
||||
|
||||
|
||||
class IntakeProcessor:
|
||||
def __init__(
|
||||
self,
|
||||
context: OpenAILLMContext,
|
||||
llm: AIService,
|
||||
tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._context: OpenAILLMContext = context
|
||||
self._llm = llm
|
||||
def __init__(self, context: OpenAILLMContext):
|
||||
print(f"Initializing context from IntakeProcessor")
|
||||
self._context.add_message({"role": "system", "content": "You are Jessica, an agent for a company called Tri-County Health Services. Your job is to collect important information from the user before their doctor visit. You're talking to Chad Bailey. You should address the user by their first name and be polite and professional. You're not a medical professional, so you shouldn't provide any advice. Keep your responses short. Your job is to collect information to give to a doctor. Don't make assumptions about what values to plug into functions. Ask for clarification if a user response is ambiguous. Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function."})
|
||||
self._context.set_tools([
|
||||
context.add_message(
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "verify_birthday",
|
||||
"description": "Use this function to verify the user has provided their correct birthday.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"birthday": {
|
||||
"type": "string",
|
||||
"description": "The user's birthdate, including the year. The user can provide it in any format, but convert it to YYYY-MM-DD format to call this function.",
|
||||
}},
|
||||
"role": "system",
|
||||
"content": "You are Jessica, an agent for a company called Tri-County Health Services. Your job is to collect important information from the user before their doctor visit. You're talking to Chad Bailey. You should address the user by their first name and be polite and professional. You're not a medical professional, so you shouldn't provide any advice. Keep your responses short. Your job is to collect information to give to a doctor. Don't make assumptions about what values to plug into functions. Ask for clarification if a user response is ambiguous. Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function.",
|
||||
}
|
||||
)
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "verify_birthday",
|
||||
"description": "Use this function to verify the user has provided their correct birthday.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"birthday": {
|
||||
"type": "string",
|
||||
"description": "The user's birthdate, including the year. The user can provide it in any format, but convert it to YYYY-MM-DD format to call this function.",
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}])
|
||||
# Create an allowlist of functions that the LLM can call
|
||||
self._functions = [
|
||||
"verify_birthday",
|
||||
"list_prescriptions",
|
||||
"list_allergies",
|
||||
"list_conditions",
|
||||
"list_visit_reasons",
|
||||
]
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
async def verify_birthday(self, llm, args):
|
||||
async def verify_birthday(
|
||||
self, function_name, tool_call_id, args, llm, context, result_callback
|
||||
):
|
||||
if args["birthday"] == "1983-01-01":
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -129,23 +114,40 @@ class IntakeProcessor:
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}])
|
||||
}
|
||||
]
|
||||
)
|
||||
# It's a bit weird to push this to the LLM, but it gets it into the pipeline
|
||||
await llm.push_frame(sounds["ding2.wav"], FrameDirection.DOWNSTREAM)
|
||||
# await llm.push_frame(sounds["ding2.wav"], FrameDirection.DOWNSTREAM)
|
||||
# We don't need the function call in the context, so just return a new
|
||||
# system message and let the framework re-prompt
|
||||
return [{"role": "system", "content": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages."}]
|
||||
await result_callback(
|
||||
[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages.",
|
||||
}
|
||||
]
|
||||
)
|
||||
else:
|
||||
# The user provided an incorrect birthday; ask them to try again
|
||||
return [{"role": "system", "content": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function."}]
|
||||
await result_callback(
|
||||
[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function.",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
async def start_prescriptions(self, llm):
|
||||
async def start_prescriptions(self, function_name, llm, context):
|
||||
print(f"!!! doing start prescriptions")
|
||||
# Move on to allergies
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -163,24 +165,30 @@ class IntakeProcessor:
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "What the user is allergic to",
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}])
|
||||
self._context.add_message(
|
||||
}
|
||||
]
|
||||
)
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Next, ask the user if they have any allergies. Once they have listed their allergies or confirmed they don't have any, call the list_allergies function."})
|
||||
"content": "Next, ask the user if they have any allergies. Once they have listed their allergies or confirmed they don't have any, call the list_allergies function.",
|
||||
}
|
||||
)
|
||||
print(f"!!! about to await llm process frame in start prescrpitions")
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
print(f"!!! past await process frame in start prescriptions")
|
||||
|
||||
async def start_allergies(self, llm):
|
||||
async def start_allergies(self, function_name, llm, context):
|
||||
print("!!! doing start allergies")
|
||||
# Move on to conditions
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -198,23 +206,28 @@ class IntakeProcessor:
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The user's medical condition",
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
])
|
||||
self._context.add_message(
|
||||
]
|
||||
)
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Now ask the user if they have any medical conditions the doctor should know about. Once they've answered the question, call the list_conditions function."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
"content": "Now ask the user if they have any medical conditions the doctor should know about. Once they've answered the question, call the list_conditions function.",
|
||||
}
|
||||
)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def start_conditions(self, llm):
|
||||
async def start_conditions(self, function_name, llm, context):
|
||||
print("!!! doing start conditions")
|
||||
# Move on to visit reasons
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -232,42 +245,50 @@ class IntakeProcessor:
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The user's reason for visiting the doctor",
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
}},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}])
|
||||
self._context.add_message(
|
||||
{"role": "system", "content": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
}
|
||||
]
|
||||
)
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function.",
|
||||
}
|
||||
)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def start_visit_reasons(self, llm):
|
||||
async def start_visit_reasons(self, function_name, llm, context):
|
||||
print("!!! doing start visit reasons")
|
||||
# move to finish call
|
||||
self._context.set_tools([])
|
||||
self._context.add_message({"role": "system",
|
||||
"content": "Now, thank the user and end the conversation."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
context.set_tools([])
|
||||
context.add_message(
|
||||
{"role": "system", "content": "Now, thank the user and end the conversation."}
|
||||
)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def save_data(self, llm, args):
|
||||
async def save_data(self, function_name, tool_call_id, args, llm, context, result_callback):
|
||||
logger.info(f"!!! Saving data: {args}")
|
||||
# Since this is supposed to be "async", returning None from the callback
|
||||
# will prevent adding anything to context or re-prompting
|
||||
return None
|
||||
await result_callback(None)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
@@ -279,63 +300,53 @@ async def main(room_url: str, token):
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
# tts = CartesiaTTSService(
|
||||
# api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
# voice_id="846d6cb0-2301-48b6-9683-48f5618ea2f6", # Spanish-speaking Lady
|
||||
# )
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = []
|
||||
context = OpenAILLMContext(messages=messages)
|
||||
user_context = LLMUserContextAggregator(context)
|
||||
assistant_context = LLMAssistantContextAggregator(context)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
intake = IntakeProcessor(context, llm)
|
||||
intake = IntakeProcessor(context)
|
||||
llm.register_function("verify_birthday", intake.verify_birthday)
|
||||
llm.register_function(
|
||||
"list_prescriptions",
|
||||
intake.save_data,
|
||||
start_callback=intake.start_prescriptions)
|
||||
"list_prescriptions", intake.save_data, start_callback=intake.start_prescriptions
|
||||
)
|
||||
llm.register_function(
|
||||
"list_allergies",
|
||||
intake.save_data,
|
||||
start_callback=intake.start_allergies)
|
||||
"list_allergies", intake.save_data, start_callback=intake.start_allergies
|
||||
)
|
||||
llm.register_function(
|
||||
"list_conditions",
|
||||
intake.save_data,
|
||||
start_callback=intake.start_conditions)
|
||||
"list_conditions", intake.save_data, start_callback=intake.start_conditions
|
||||
)
|
||||
llm.register_function(
|
||||
"list_visit_reasons",
|
||||
intake.save_data,
|
||||
start_callback=intake.start_visit_reasons)
|
||||
"list_visit_reasons", intake.save_data, start_callback=intake.start_visit_reasons
|
||||
)
|
||||
|
||||
fl = FrameLogger("LLM Output")
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport input
|
||||
user_context, # User responses
|
||||
llm, # LLM
|
||||
fl, # Frame logger
|
||||
tts, # TTS
|
||||
transport.output(), # Transport output
|
||||
assistant_context, # Assistant responses
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
fl, # Frame logger
|
||||
tts, # TTS
|
||||
transport.output(), # Transport output
|
||||
context_aggregator.assistant(), # Assistant responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=False))
|
||||
|
||||
@@ -351,5 +362,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,openai,silero]
|
||||
pipecat-ai[daily,cartesia,openai,silero]
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +31,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
@@ -1,31 +1,52 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -39,45 +60,42 @@ app.add_middleware(
|
||||
@app.get("/start")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room_url, room_name = _create_room()
|
||||
print(f"!!! Room URL: {room_url}")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
print(f"!!! Room URL: {room.url}")
|
||||
# Ensure the room property is present
|
||||
if not room_url:
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
|
||||
)
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
|
||||
)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
],
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
bot_procs[proc.pid] = (proc, room.url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
return RedirectResponse(room.url)
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
@@ -87,8 +105,7 @@ def get_status(pid: int):
|
||||
|
||||
# If the subprocess doesn't exist, return an error
|
||||
if not proc:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
|
||||
# Check the status of the subprocess
|
||||
if proc[0].poll() is None:
|
||||
@@ -105,14 +122,10 @@ if __name__ == "__main__":
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str, default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int, default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
print(f"to join a test room, visit http://localhost:{config.port}/start")
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
@@ -8,19 +14,22 @@ from PIL import Image
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.frames.frames import (
|
||||
AudioRawFrame,
|
||||
ImageRawFrame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
Frame,
|
||||
LLMMessagesFrame,
|
||||
TTSStoppedFrame
|
||||
TTSAudioRawFrame,
|
||||
TTSStoppedFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTranscriptionSettings, DailyTransport
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
@@ -28,6 +37,7 @@ from runner import configure
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -43,7 +53,7 @@ for i in range(1, 26):
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
@@ -66,7 +76,7 @@ class TalkingAnimation(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
if isinstance(frame, TTSAudioRawFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
@@ -77,8 +87,10 @@ class TalkingAnimation(FrameProcessor):
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -99,17 +111,15 @@ async def main(room_url: str, token):
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
@@ -117,9 +127,7 @@ async def main(room_url: str, token):
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -128,7 +136,6 @@ async def main(room_url: str, token):
|
||||
# English
|
||||
#
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.",
|
||||
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
@@ -141,15 +148,17 @@ async def main(room_url: str, token):
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_response,
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
assistant_response,
|
||||
])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_response,
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
assistant_response,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
await task.queue_frame(quiet_frame)
|
||||
@@ -165,5 +174,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,openai,silero]
|
||||
pipecat-ai[daily,elevenlabs,openai,silero]
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +31,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
@@ -1,31 +1,52 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -39,45 +60,42 @@ app.add_middleware(
|
||||
@app.get("/start")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room_url, room_name = _create_room()
|
||||
print(f"!!! Room URL: {room_url}")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
print(f"!!! Room URL: {room.url}")
|
||||
# Ensure the room property is present
|
||||
if not room_url:
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
|
||||
)
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
|
||||
)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
],
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
bot_procs[proc.pid] = (proc, room.url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
return RedirectResponse(room.url)
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
@@ -87,8 +105,7 @@ def get_status(pid: int):
|
||||
|
||||
# If the subprocess doesn't exist, return an error
|
||||
if not proc:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
|
||||
# Check the status of the subprocess
|
||||
if proc[0].poll() is None:
|
||||
@@ -105,14 +122,10 @@ if __name__ == "__main__":
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str, default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int, default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
6240
examples/storytelling-chatbot/frontend/package-lock.json
generated
Normal file
6240
examples/storytelling-chatbot/frontend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,5 @@
|
||||
async_timeout
|
||||
fastapi
|
||||
uvicorn
|
||||
requests
|
||||
python-dotenv
|
||||
pipecat-ai[daily,openai,fal]
|
||||
pipecat-ai[daily,elevenlabs,openai,fal]
|
||||
|
||||
@@ -9,11 +9,18 @@ from pipecat.frames.frames import LLMMessagesFrame, StopTaskFrame, EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyTransportMessageFrame
|
||||
from pipecat.transports.services.daily import (
|
||||
DailyParams,
|
||||
DailyTransport,
|
||||
DailyTransportMessageFrame,
|
||||
)
|
||||
|
||||
from processors import StoryProcessor, StoryImageProcessor
|
||||
from prompts import LLM_BASE_PROMPT, LLM_INTRO_PROMPT, CUE_USER_TURN
|
||||
@@ -22,6 +29,7 @@ from utils.helpers import load_sounds, load_images
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -33,7 +41,6 @@ images = load_images(["book1.png", "book2.png"])
|
||||
|
||||
async def main(room_url, token=None):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
|
||||
# -------------- Transport --------------- #
|
||||
|
||||
transport = DailyTransport(
|
||||
@@ -47,29 +54,22 @@ async def main(room_url, token=None):
|
||||
camera_out_height=768,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
logger.debug("Transport created for room:" + room_url)
|
||||
|
||||
# -------------- Services --------------- #
|
||||
|
||||
llm_service = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o"
|
||||
)
|
||||
llm_service = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
tts_service = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
fal_service_params = FalImageGenService.InputParams(
|
||||
image_size={
|
||||
"width": 768,
|
||||
"height": 768
|
||||
}
|
||||
image_size={"width": 768, "height": 768}
|
||||
)
|
||||
|
||||
fal_service = FalImageGenService(
|
||||
@@ -111,12 +111,12 @@ async def main(room_url, token=None):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await intro_task.queue_frames(
|
||||
[
|
||||
images['book1'],
|
||||
images["book1"],
|
||||
LLMMessagesFrame([LLM_INTRO_PROMPT]),
|
||||
DailyTransportMessageFrame(CUE_USER_TURN),
|
||||
sounds["listening"],
|
||||
images['book2'],
|
||||
StopTaskFrame()
|
||||
images["book2"],
|
||||
StopTaskFrame(),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -126,16 +126,18 @@ async def main(room_url, token=None):
|
||||
|
||||
# The main story pipeline is used to continue the story based on user
|
||||
# input.
|
||||
main_pipeline = Pipeline([
|
||||
transport.input(),
|
||||
user_responses,
|
||||
llm_service,
|
||||
story_processor,
|
||||
image_processor,
|
||||
tts_service,
|
||||
transport.output(),
|
||||
llm_responses
|
||||
])
|
||||
main_pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_responses,
|
||||
llm_service,
|
||||
story_processor,
|
||||
image_processor,
|
||||
tts_service,
|
||||
transport.output(),
|
||||
llm_responses,
|
||||
]
|
||||
)
|
||||
|
||||
main_task = PipelineTask(main_pipeline)
|
||||
|
||||
@@ -151,6 +153,7 @@ async def main(room_url, token=None):
|
||||
|
||||
await runner.run(main_task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Daily Storyteller Bot")
|
||||
parser.add_argument("-u", type=str, help="Room URL")
|
||||
|
||||
@@ -1,31 +1,56 @@
|
||||
import os
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import subprocess
|
||||
import requests
|
||||
import os
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomParams
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomProperties,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# ------------ Fast API Config ------------ #
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
os.getenv("DAILY_API_KEY", ""),
|
||||
os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'))
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -66,55 +91,50 @@ async def start_bot(request: Request) -> JSONResponse:
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", "")
|
||||
|
||||
if not room_url:
|
||||
params = DailyRoomParams(
|
||||
properties=DailyRoomProperties()
|
||||
)
|
||||
params = DailyRoomParams(properties=DailyRoomProperties())
|
||||
try:
|
||||
room: DailyRoomObject = daily_rest_helper.create_room(params=params)
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Unable to provision room {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Unable to provision room {e}")
|
||||
else:
|
||||
# Check passed room URL exists, we should assume that it already has a sip set up
|
||||
try:
|
||||
room: DailyRoomObject = daily_rest_helper.get_room_from_url(room_url)
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Room not found: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
# Launch a new VM, or run as a shell process (not recommended)
|
||||
if os.getenv("RUN_AS_VM", False):
|
||||
try:
|
||||
virtualize_bot(room.url, token)
|
||||
await virtualize_bot(room.url, token)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to spawn VM: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to spawn VM: {e}")
|
||||
else:
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
user_token = daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
|
||||
user_token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
return JSONResponse({
|
||||
"room_url": room.url,
|
||||
"token": user_token,
|
||||
})
|
||||
return JSONResponse(
|
||||
{
|
||||
"room_url": room.url,
|
||||
"token": user_token,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/{path_name:path}", response_class=FileResponse)
|
||||
@@ -136,7 +156,8 @@ async def catch_all(path_name: Optional[str] = ""):
|
||||
|
||||
# ------------ Virtualization ------------ #
|
||||
|
||||
def virtualize_bot(room_url: str, token: str):
|
||||
|
||||
async def virtualize_bot(room_url: str, token: str):
|
||||
"""
|
||||
This is an example of how to virtualize the bot using Fly.io
|
||||
You can adapt this method to use whichever cloud provider you prefer.
|
||||
@@ -144,67 +165,67 @@ def virtualize_bot(room_url: str, token: str):
|
||||
FLY_API_HOST = os.getenv("FLY_API_HOST", "https://api.machines.dev/v1")
|
||||
FLY_APP_NAME = os.getenv("FLY_APP_NAME", "storytelling-chatbot")
|
||||
FLY_API_KEY = os.getenv("FLY_API_KEY", "")
|
||||
FLY_HEADERS = {
|
||||
'Authorization': f"Bearer {FLY_API_KEY}",
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
FLY_HEADERS = {"Authorization": f"Bearer {FLY_API_KEY}", "Content-Type": "application/json"}
|
||||
|
||||
# Use the same image as the bot runner
|
||||
res = requests.get(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to get machine info from Fly: {res.text}")
|
||||
image = res.json()[0]['config']['image']
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use the same image as the bot runner
|
||||
async with session.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Unable to get machine info from Fly: {text}")
|
||||
|
||||
# Machine configuration
|
||||
cmd = f"python3 src/bot.py -u {room_url} -t {token}"
|
||||
cmd = cmd.split()
|
||||
worker_props = {
|
||||
"config": {
|
||||
"image": image,
|
||||
"auto_destroy": True,
|
||||
"init": {
|
||||
"cmd": cmd
|
||||
data = await r.json()
|
||||
image = data[0]["config"]["image"]
|
||||
|
||||
# Machine configuration
|
||||
cmd = f"python3 src/bot.py -u {room_url} -t {token}"
|
||||
cmd = cmd.split()
|
||||
worker_props = {
|
||||
"config": {
|
||||
"image": image,
|
||||
"auto_destroy": True,
|
||||
"init": {"cmd": cmd},
|
||||
"restart": {"policy": "no"},
|
||||
"guest": {"cpu_kind": "shared", "cpus": 1, "memory_mb": 512},
|
||||
},
|
||||
"restart": {
|
||||
"policy": "no"
|
||||
},
|
||||
"guest": {
|
||||
"cpu_kind": "shared",
|
||||
"cpus": 1,
|
||||
"memory_mb": 512
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
}
|
||||
# Spawn a new machine instance
|
||||
async with session.post(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS, json=worker_props
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Problem starting a bot worker: {text}")
|
||||
|
||||
# Spawn a new machine instance
|
||||
res = requests.post(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines",
|
||||
headers=FLY_HEADERS,
|
||||
json=worker_props)
|
||||
data = await r.json()
|
||||
# Wait for the machine to enter the started state
|
||||
vm_id = data["id"]
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Problem starting a bot worker: {res.text}")
|
||||
async with session.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started",
|
||||
headers=FLY_HEADERS,
|
||||
) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Bot was unable to enter started state: {text}")
|
||||
|
||||
# Wait for the machine to enter the started state
|
||||
vm_id = res.json()['id']
|
||||
|
||||
res = requests.get(
|
||||
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started",
|
||||
headers=FLY_HEADERS)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Bot was unable to enter started state: {res.text}")
|
||||
|
||||
print(f"Machine joined room: {room_url}")
|
||||
print(f"Machine joined room: {room_url}")
|
||||
|
||||
|
||||
# ------------ Main ------------ #
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
required_env_vars = ['OPENAI_API_KEY', 'DAILY_API_KEY',
|
||||
'FAL_KEY', 'ELEVENLABS_VOICE_ID', 'ELEVENLABS_API_KEY']
|
||||
required_env_vars = [
|
||||
"OPENAI_API_KEY",
|
||||
"DAILY_API_KEY",
|
||||
"FAL_KEY",
|
||||
"ELEVENLABS_VOICE_ID",
|
||||
"ELEVENLABS_API_KEY",
|
||||
]
|
||||
for env_var in required_env_vars:
|
||||
if env_var not in os.environ:
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
@@ -214,20 +235,11 @@ if __name__ == "__main__":
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str, default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int, default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true", help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
uvicorn.run(
|
||||
"bot_runner:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
reload=config.reload
|
||||
)
|
||||
uvicorn.run("bot_runner:app", host=config.host, port=config.port, reload=config.reload)
|
||||
|
||||
@@ -6,7 +6,8 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMFullResponseEndFrame,
|
||||
TextFrame,
|
||||
UserStoppedSpeakingFrame)
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.services.daily import DailyTransportMessageFrame
|
||||
|
||||
@@ -35,6 +36,7 @@ class StoryPromptFrame(TextFrame):
|
||||
|
||||
# ------------ Frame Processors ----------- #
|
||||
|
||||
|
||||
class StoryImageProcessor(FrameProcessor):
|
||||
"""
|
||||
Processor for image prompt frames that will be sent to the FAL service.
|
||||
@@ -113,7 +115,7 @@ class StoryProcessor(FrameProcessor):
|
||||
# Extract the image prompt from the text using regex
|
||||
image_prompt = re.search(r"<(.*?)>", self._text).group(1)
|
||||
# Remove the image prompt from the text
|
||||
self._text = re.sub(r"<.*?>", '', self._text, count=1)
|
||||
self._text = re.sub(r"<.*?>", "", self._text, count=1)
|
||||
# Process the image prompt frame
|
||||
await self.push_frame(StoryImageFrame(image_prompt))
|
||||
|
||||
@@ -124,8 +126,7 @@ class StoryProcessor(FrameProcessor):
|
||||
if re.search(r".*\[[bB]reak\].*", self._text):
|
||||
# Remove the [break] token from the text
|
||||
# so it isn't spoken out loud by the TTS
|
||||
self._text = re.sub(r'\[[bB]reak\]', '',
|
||||
self._text, flags=re.IGNORECASE)
|
||||
self._text = re.sub(r"\[[bB]reak\]", "", self._text, flags=re.IGNORECASE)
|
||||
self._text = self._text.replace("\n", " ")
|
||||
if len(self._text) > 2:
|
||||
# Append the sentence to the story
|
||||
|
||||
@@ -3,7 +3,7 @@ LLM_INTRO_PROMPT = {
|
||||
"content": "You are a creative storyteller who loves to tell whimsical, fantastical stories. \
|
||||
Your goal is to craft an engaging and fun story. \
|
||||
Start by asking the user what kind of story they'd like to hear. Don't provide any examples. \
|
||||
Keep your response to only a few sentences."
|
||||
Keep your response to only a few sentences.",
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ LLM_BASE_PROMPT = {
|
||||
Responses should use the format: <...> story sentence [break] <...> story sentence [break] ... \
|
||||
After each response, ask me how I'd like the story to continue and wait for my input. \
|
||||
Please ensure your responses are less than 3-4 sentences long. \
|
||||
Please refrain from using any explicit language or content. Do not tell scary stories."
|
||||
Please refrain from using any explicit language or content. Do not tell scary stories.",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
import wave
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame, ImageRawFrame
|
||||
from pipecat.frames.frames import OutputAudioRawFrame, OutputImageRawFrame
|
||||
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
@@ -16,7 +16,9 @@ def load_images(image_files):
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
images[filename] = ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format)
|
||||
images[filename] = OutputImageRawFrame(
|
||||
image=img.tobytes(), size=img.size, format=img.format
|
||||
)
|
||||
return images
|
||||
|
||||
|
||||
@@ -30,8 +32,10 @@ def load_sounds(sound_files):
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
# Open the sound and convert it to bytes
|
||||
with wave.open(full_path) as audio_file:
|
||||
sounds[filename] = AudioRawFrame(audio=audio_file.readframes(-1),
|
||||
sample_rate=audio_file.getframerate(),
|
||||
num_channels=audio_file.getnchannels())
|
||||
sounds[filename] = OutputAudioRawFrame(
|
||||
audio=audio_file.readframes(-1),
|
||||
sample_rate=audio_file.getframerate(),
|
||||
num_channels=audio_file.getnchannels(),
|
||||
)
|
||||
|
||||
return sounds
|
||||
|
||||
13
examples/studypal/README.md
Normal file
13
examples/studypal/README.md
Normal file
@@ -0,0 +1,13 @@
|
||||
# studypal
|
||||
|
||||
### Have a conversation about any article on the web
|
||||
|
||||
studypal is a fast conversational AI built using [Daily](https://www.daily.co/) for real-time media transport and [Cartesia](https://cartesia.ai) for text-to-speech. Everything is orchestrated together (VAD -> STT -> LLM -> TTS) using [Pipecat](https://www.pipecat.ai/).
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository
|
||||
2. Copy `env.example` to a `.env` file and add API keys
|
||||
3. Install the required packages: `pip install -r requirements.txt`
|
||||
4. Run `python3 studypal.py` from your command line.
|
||||
5. While the app is running, go to the `https://<yourdomain>.daily.co/<room_url>` set in `DAILY_SAMPLE_ROOM_URL` and talk to studypal!
|
||||
5
examples/studypal/env.example
Normal file
5
examples/studypal/env.example
Normal file
@@ -0,0 +1,5 @@
|
||||
DAILY_SAMPLE_ROOM_URL= # Follow instructions here and put your https://YOURDOMAIN.daily.co/YOURROOM (Instructions: https://docs.pipecat.ai/quickstart#preparing-your-environment)
|
||||
DAILY_API_KEY= # Create here: https://dashboard.daily.co/developers
|
||||
OPENAI_API_KEY= # Create here: https://platform.openai.com/docs/overview
|
||||
CARTESIA_API_KEY= # Create here: https://play.cartesia.ai/console
|
||||
CARTESIA_VOICE_ID= # Find here: https://play.cartesia.ai/
|
||||
5
examples/studypal/requirements.txt
Normal file
5
examples/studypal/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
beautifulsoup4==4.12.3
|
||||
pypdf==4.3.1
|
||||
tiktoken==0.7.0
|
||||
pipecat-ai[daily,cartesia,openai,silero]==0.0.40
|
||||
python-dotenv==1.0.1
|
||||
62
examples/studypal/runner.py
Normal file
62
examples/studypal/runner.py
Normal file
@@ -0,0 +1,62 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
(url, token, _) = await configure_with_args(aiohttp_session)
|
||||
return (url, token)
|
||||
|
||||
|
||||
async def configure_with_args(
|
||||
aiohttp_session: aiohttp.ClientSession, parser: argparse.ArgumentParser | None = None
|
||||
):
|
||||
if not parser:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Daily API Key (needed to create an owner token for the room)",
|
||||
)
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
|
||||
key = args.apikey or os.getenv("DAILY_API_KEY")
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token, args)
|
||||
186
examples/studypal/studypal.py
Normal file
186
examples/studypal/studypal.py
Normal file
@@ -0,0 +1,186 @@
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import io
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from pypdf import PdfReader
|
||||
import tiktoken
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Run this script directly from your command line.
|
||||
# This project was adapted from
|
||||
# https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/07d-interruptible-cartesia.py
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
# Count number of tokens used in model and truncate the content
|
||||
def truncate_content(content, model_name):
|
||||
encoding = tiktoken.encoding_for_model(model_name)
|
||||
tokens = encoding.encode(content)
|
||||
|
||||
max_tokens = 10000
|
||||
if len(tokens) > max_tokens:
|
||||
truncated_tokens = tokens[:max_tokens]
|
||||
return encoding.decode(truncated_tokens)
|
||||
return content
|
||||
|
||||
|
||||
# Main function to extract content from url
|
||||
|
||||
|
||||
async def get_article_content(url: str, aiohttp_session: aiohttp.ClientSession):
|
||||
if "arxiv.org" in url:
|
||||
return await get_arxiv_content(url, aiohttp_session)
|
||||
else:
|
||||
return await get_wikipedia_content(url, aiohttp_session)
|
||||
|
||||
|
||||
# Helper function to extract content from Wikipedia url (this is
|
||||
# technically agnostic to URL type but will work best with Wikipedia
|
||||
# articles)
|
||||
|
||||
|
||||
async def get_wikipedia_content(url: str, aiohttp_session: aiohttp.ClientSession):
|
||||
async with aiohttp_session.get(url) as response:
|
||||
if response.status != 200:
|
||||
return "Failed to download Wikipedia article."
|
||||
|
||||
text = await response.text()
|
||||
soup = BeautifulSoup(text, "html.parser")
|
||||
|
||||
content = soup.find("div", {"class": "mw-parser-output"})
|
||||
|
||||
if content:
|
||||
return content.get_text()
|
||||
else:
|
||||
return "Failed to extract Wikipedia article content."
|
||||
|
||||
|
||||
# Helper function to extract content from arXiv url
|
||||
|
||||
|
||||
async def get_arxiv_content(url: str, aiohttp_session: aiohttp.ClientSession):
|
||||
if "/abs/" in url:
|
||||
url = url.replace("/abs/", "/pdf/")
|
||||
if not url.endswith(".pdf"):
|
||||
url += ".pdf"
|
||||
|
||||
async with aiohttp_session.get(url) as response:
|
||||
if response.status != 200:
|
||||
return "Failed to download arXiv PDF."
|
||||
|
||||
content = await response.read()
|
||||
pdf_file = io.BytesIO(content)
|
||||
pdf_reader = PdfReader(pdf_file)
|
||||
text = ""
|
||||
for page in pdf_reader.pages:
|
||||
text += page.extract_text()
|
||||
return text
|
||||
|
||||
|
||||
# This is the main function that handles STT -> LLM -> TTS
|
||||
|
||||
|
||||
async def main():
|
||||
url = input("Enter the URL of the article you would like to talk about: ")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
article_content = await get_article_content(url, session)
|
||||
article_content = truncate_content(article_content, model_name="gpt-4o-mini")
|
||||
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"studypal",
|
||||
DailyParams(
|
||||
audio_out_sample_rate=44100,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id=os.getenv("CARTESIA_VOICE_ID", "4d2fd738-3b3d-4368-957a-bb4805275bd9"),
|
||||
# British Narration Lady: 4d2fd738-3b3d-4368-957a-bb4805275bd9
|
||||
params=CartesiaTTSService.InputParams(
|
||||
sample_rate=44100,
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"""You are an AI study partner. You have been given the following article content:
|
||||
|
||||
{article_content}
|
||||
|
||||
Your task is to help the user understand and learn from this article in 2 sentences. THESE RESPONSES SHOULD BE ONLY MAX 2 SENTENCES. THIS INSTRUCTION IS VERY IMPORTANT. RESPONSES SHOULDN'T BE LONG.
|
||||
""",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Hello! I'm ready to discuss the article with you. What would you like to learn about?",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,5 +1,11 @@
|
||||
import asyncio
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -12,13 +18,19 @@ from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.azure import AzureTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTranscriptionSettings, DailyTransport, DailyTransportMessageFrame
|
||||
from pipecat.transports.services.daily import (
|
||||
DailyParams,
|
||||
DailyTranscriptionSettings,
|
||||
DailyTransport,
|
||||
DailyTransportMessageFrame,
|
||||
)
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
@@ -34,7 +46,6 @@ It also isn't saving what the user or bot says into the context object for use i
|
||||
# We need to use a custom service here to yield LLM frames without saving
|
||||
# any context
|
||||
class TranslationProcessor(FrameProcessor):
|
||||
|
||||
def __init__(self, language):
|
||||
super().__init__()
|
||||
self._language = language
|
||||
@@ -70,17 +81,16 @@ class TranslationSubtitles(FrameProcessor):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TextFrame):
|
||||
message = {
|
||||
"language": self._language,
|
||||
"text": frame.text
|
||||
}
|
||||
message = {"language": self._language, "text": frame.text}
|
||||
await self.push_frame(DailyTransportMessageFrame(message))
|
||||
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -88,10 +98,8 @@ async def main(room_url: str, token):
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
transcription_settings=DailyTranscriptionSettings(extra={
|
||||
"interim_results": False
|
||||
})
|
||||
)
|
||||
transcription_settings=DailyTranscriptionSettings(extra={"interim_results": False}),
|
||||
),
|
||||
)
|
||||
|
||||
tts = AzureTTSService(
|
||||
@@ -100,26 +108,14 @@ async def main(room_url: str, token):
|
||||
voice="es-ES-AlvaroNeural",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o"
|
||||
)
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
sa = SentenceAggregator()
|
||||
tp = TranslationProcessor("Spanish")
|
||||
lfra = LLMFullResponseAggregator()
|
||||
ts = TranslationSubtitles("spanish")
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
sa,
|
||||
tp,
|
||||
llm,
|
||||
lfra,
|
||||
ts,
|
||||
tts,
|
||||
transport.output()
|
||||
])
|
||||
pipeline = Pipeline([transport.input(), sa, tp, llm, lfra, ts, tts, transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@@ -133,5 +129,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
pipecat-ai[daily,openai,azure]
|
||||
|
||||
@@ -1,18 +1,22 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
import aiohttp
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
type=str,
|
||||
required=False,
|
||||
help="URL of the Daily room to join")
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--apikey",
|
||||
@@ -28,31 +32,24 @@ def configure():
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
raise Exception(
|
||||
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||
)
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user