Compare commits
442 Commits
v0.0.29
...
khk/togeth
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
90c64c3df6 | ||
|
|
c2bc64361a | ||
|
|
9bbb824248 | ||
|
|
7c342f7ba2 | ||
|
|
37e2388758 | ||
|
|
05f0492a8d | ||
|
|
c0ac5c6ae8 | ||
|
|
be923687fb | ||
|
|
5f32fb125d | ||
|
|
ae6fbb3146 | ||
|
|
864768635a | ||
|
|
d7c9679977 | ||
|
|
fedfc366f6 | ||
|
|
b3b39626e1 | ||
|
|
4e0ece17b6 | ||
|
|
fd3fdacdee | ||
|
|
a253606d50 | ||
|
|
568d9dc0a3 | ||
|
|
6629b853c5 | ||
|
|
3931cb3235 | ||
|
|
38cd86ad52 | ||
|
|
c0cdabf61d | ||
|
|
51270a96c5 | ||
|
|
84d72c0d5c | ||
|
|
79aca8169a | ||
|
|
b9d362bd62 | ||
|
|
87c4a1bee1 | ||
|
|
c979762b70 | ||
|
|
1d92fc3199 | ||
|
|
8ac7fb1a67 | ||
|
|
60c3d33def | ||
|
|
8a39d3f4eb | ||
|
|
e038767b6f | ||
|
|
0c46b3e481 | ||
|
|
d42f072ff5 | ||
|
|
9b6f29c24a | ||
|
|
873d5dc23f | ||
|
|
6d141fd47f | ||
|
|
c6f6cb2947 | ||
|
|
0eb189ce7f | ||
|
|
f4fd7b7028 | ||
|
|
21de8e0a35 | ||
|
|
6f55d494bd | ||
|
|
d216edc567 | ||
|
|
ec6063ecc4 | ||
|
|
40fe4ce6fb | ||
|
|
31d87a4048 | ||
|
|
ac8b171fa9 | ||
|
|
1f06d78213 | ||
|
|
28eba17df8 | ||
|
|
dfc2e62339 | ||
|
|
80c89a39c9 | ||
|
|
9d1c16e996 | ||
|
|
86604c2353 | ||
|
|
8f31a02938 | ||
|
|
47d375309d | ||
|
|
980265ca97 | ||
|
|
90479fff95 | ||
|
|
1ce1fcb0ce | ||
|
|
1a662376fc | ||
|
|
1d24f926ec | ||
|
|
4f2c37c940 | ||
|
|
042115a6bb | ||
|
|
c9f1469b41 | ||
|
|
54c9f604c9 | ||
|
|
56fbcd6562 | ||
|
|
e6b0500568 | ||
|
|
41038b6673 | ||
|
|
26d03f26c9 | ||
|
|
f3a4e54996 | ||
|
|
925e80bb20 | ||
|
|
9bda09b1a8 | ||
|
|
ef0d0531fa | ||
|
|
6520f20ffe | ||
|
|
ebc4e0924b | ||
|
|
9e7c0e6033 | ||
|
|
cf5720f316 | ||
|
|
655b468269 | ||
|
|
17f8c93e44 | ||
|
|
5b4061b0d5 | ||
|
|
6ce0227e98 | ||
|
|
a583a28850 | ||
|
|
32daf65adc | ||
|
|
e22c80610e | ||
|
|
374f1e7e01 | ||
|
|
d2dfa93bf1 | ||
|
|
fa8c6712c6 | ||
|
|
4c2b84cb4d | ||
|
|
b57c9d569b | ||
|
|
f0e50ba000 | ||
|
|
4a6638f749 | ||
|
|
31577252f3 | ||
|
|
5d71c50080 | ||
|
|
981269d594 | ||
|
|
848db985fc | ||
|
|
d5d8e31447 | ||
|
|
66670a2370 | ||
|
|
5637f349c6 | ||
|
|
93248e1d00 | ||
|
|
187769357f | ||
|
|
5be6422cc8 | ||
|
|
8670b2d994 | ||
|
|
0bc6db428d | ||
|
|
67d565930e | ||
|
|
b2a7ff6fd3 | ||
|
|
425a730d7c | ||
|
|
84c5709722 | ||
|
|
94deec01c9 | ||
|
|
6e0dd4a779 | ||
|
|
14bde340dd | ||
|
|
253765c611 | ||
|
|
2b26d7182f | ||
|
|
61ac83e2d9 | ||
|
|
d5c7b28cad | ||
|
|
959580a708 | ||
|
|
3a5cd17ea3 | ||
|
|
b78981bb9d | ||
|
|
a6d90b0a00 | ||
|
|
67016492f2 | ||
|
|
2c38089527 | ||
|
|
48f68ba6dc | ||
|
|
574df4ba3d | ||
|
|
49ca16d125 | ||
|
|
87525b085e | ||
|
|
6b53c6add3 | ||
|
|
29ca1b7855 | ||
|
|
a42d0c9907 | ||
|
|
8bc6ceaa3d | ||
|
|
0b8a1ab5d1 | ||
|
|
358c287db2 | ||
|
|
2e68453655 | ||
|
|
89b8a9de7d | ||
|
|
c4c2058df9 | ||
|
|
0d85c0085f | ||
|
|
6fa8a8f84f | ||
|
|
a97775bff3 | ||
|
|
32640e054d | ||
|
|
aa42da5658 | ||
|
|
900a94a825 | ||
|
|
c37552de70 | ||
|
|
916b37926c | ||
|
|
2b76c3c15a | ||
|
|
cedd7dde18 | ||
|
|
d088608d8e | ||
|
|
06ee29bb8b | ||
|
|
d255e954d6 | ||
|
|
6a7ab6b8ac | ||
|
|
45b18cc0b1 | ||
|
|
0479431f0a | ||
|
|
ec58dbd791 | ||
|
|
91de68aab3 | ||
|
|
85efc30145 | ||
|
|
0032594f21 | ||
|
|
829fdc5679 | ||
|
|
22e176e329 | ||
|
|
826a70a137 | ||
|
|
dd0ea674af | ||
|
|
a4761b8921 | ||
|
|
3958bb7903 | ||
|
|
83a037a7ce | ||
|
|
a3eb8337a6 | ||
|
|
541072f8e0 | ||
|
|
881248cbd6 | ||
|
|
d4979f5e64 | ||
|
|
4133cd03bb | ||
|
|
9f07c3ca27 | ||
|
|
b20bacb9ed | ||
|
|
97cfbfee1d | ||
|
|
fa7c941792 | ||
|
|
4738879f32 | ||
|
|
d5d88f756a | ||
|
|
65b136bf15 | ||
|
|
bee0b238e4 | ||
|
|
c891168ffb | ||
|
|
6376c2f6aa | ||
|
|
4d9b7cdd61 | ||
|
|
8263d1dd6f | ||
|
|
faf41c0b36 | ||
|
|
27a09c0b2c | ||
|
|
3db7f6a284 | ||
|
|
3bfeb5b5ef | ||
|
|
62a7a555b5 | ||
|
|
d60e99a043 | ||
|
|
77723b34c7 | ||
|
|
c466d34a06 | ||
|
|
f816897833 | ||
|
|
c1e8a5e522 | ||
|
|
76aca32f2e | ||
|
|
7e31b2a795 | ||
|
|
028e38a86b | ||
|
|
8cf7649855 | ||
|
|
64f5119b08 | ||
|
|
4d606aefb3 | ||
|
|
4bafdaa04d | ||
|
|
5afe1abf82 | ||
|
|
f066d50b98 | ||
|
|
91103e21cc | ||
|
|
f44dabcd65 | ||
|
|
0fd2fca231 | ||
|
|
5bb64098e7 | ||
|
|
3fc85e75e0 | ||
|
|
3f61ea16b7 | ||
|
|
4b393092b5 | ||
|
|
b583f5162b | ||
|
|
060a22f395 | ||
|
|
d3e85355f1 | ||
|
|
83e730b768 | ||
|
|
5fcc96446c | ||
|
|
ad88925154 | ||
|
|
0a6ddbf15c | ||
|
|
08e0722d97 | ||
|
|
05d4fba551 | ||
|
|
f41c2b3c9f | ||
|
|
69f64899fe | ||
|
|
33f0865430 | ||
|
|
ad5b9202ab | ||
|
|
1676693091 | ||
|
|
0852b50b8f | ||
|
|
eb998aa502 | ||
|
|
6dab0e9de7 | ||
|
|
95ff1d141c | ||
|
|
87bc8a9da6 | ||
|
|
087fe9a537 | ||
|
|
c1170260b5 | ||
|
|
65cdf50774 | ||
|
|
9233bb490c | ||
|
|
43932220f7 | ||
|
|
cea4d1894e | ||
|
|
80baa0358d | ||
|
|
5d73db53a0 | ||
|
|
302ea90dce | ||
|
|
37b04ed283 | ||
|
|
be6995cfdf | ||
|
|
dfbc11300c | ||
|
|
82d539d174 | ||
|
|
6e00f31014 | ||
|
|
a46ac3cc92 | ||
|
|
6fbf98d8e2 | ||
|
|
f094c42728 | ||
|
|
13827e1282 | ||
|
|
32170b47d9 | ||
|
|
09c05354c2 | ||
|
|
b0b1475563 | ||
|
|
b85dd7283a | ||
|
|
846ae765e5 | ||
|
|
4c629e538e | ||
|
|
f6e22bb3b9 | ||
|
|
46a048d7f6 | ||
|
|
bd9f4eea06 | ||
|
|
0a672e61e2 | ||
|
|
29a8530221 | ||
|
|
3e738642a7 | ||
|
|
f551f55f03 | ||
|
|
9f012c8002 | ||
|
|
0a69a9e5ef | ||
|
|
194790183a | ||
|
|
2227721173 | ||
|
|
77a53da5f5 | ||
|
|
ab63ff275d | ||
|
|
e5363f65f0 | ||
|
|
ffc157de65 | ||
|
|
f9fdadb4c0 | ||
|
|
4efccb79f2 | ||
|
|
337968199a | ||
|
|
37027f68cb | ||
|
|
d1b62c5495 | ||
|
|
355fe01cb7 | ||
|
|
9d050a16c7 | ||
|
|
fa53c67606 | ||
|
|
5006376fe6 | ||
|
|
2204b8e205 | ||
|
|
270007b17c | ||
|
|
568eb2ef4c | ||
|
|
73ca9184a8 | ||
|
|
5e8e11e16e | ||
|
|
029bbc16f2 | ||
|
|
9e3d87e4f6 | ||
|
|
f1410a1127 | ||
|
|
2b980d16c3 | ||
|
|
b2b97aafb8 | ||
|
|
da2082b025 | ||
|
|
327ea9d547 | ||
|
|
b23db4a202 | ||
|
|
d1a36004ab | ||
|
|
6071920c45 | ||
|
|
5f539e1fba | ||
|
|
8e1539c360 | ||
|
|
065cfb2aca | ||
|
|
3147534e86 | ||
|
|
be5603bf16 | ||
|
|
b9b0bcdcbd | ||
|
|
5bcece56f3 | ||
|
|
d67faef88c | ||
|
|
8f6db5e905 | ||
|
|
82e93a0560 | ||
|
|
a9a82c083b | ||
|
|
974d9c33ed | ||
|
|
c1957ab694 | ||
|
|
b20a10a4bc | ||
|
|
be14ce465d | ||
|
|
d1ca0c5614 | ||
|
|
535514f506 | ||
|
|
933b63cf13 | ||
|
|
d7c3e380a5 | ||
|
|
c5298f78cb | ||
|
|
4f8f7b8d1d | ||
|
|
d7d46919ac | ||
|
|
e5d73d2e2e | ||
|
|
b145e8ec90 | ||
|
|
97ff4a1fb8 | ||
|
|
5018a552c1 | ||
|
|
7f9fd9ffce | ||
|
|
ddd0ca6a8f | ||
|
|
06f817c7e3 | ||
|
|
df4c3e56c4 | ||
|
|
9d5c2b9656 | ||
|
|
7ce59c5e2e | ||
|
|
1c9631fc78 | ||
|
|
efbe7297f7 | ||
|
|
1b45946a61 | ||
|
|
cbf5a6362c | ||
|
|
583b96c341 | ||
|
|
fc0920504d | ||
|
|
abd65a93b2 | ||
|
|
c3244fdd7a | ||
|
|
e8f58938b0 | ||
|
|
602b4f34b1 | ||
|
|
0399c84dfa | ||
|
|
fd5d879bf5 | ||
|
|
8dff460307 | ||
|
|
cce1ddb183 | ||
|
|
8691d14289 | ||
|
|
dd402da9e5 | ||
|
|
2fd04248f1 | ||
|
|
0ac42006f8 | ||
|
|
66e331248d | ||
|
|
4be3e8c87d | ||
|
|
dac033fe61 | ||
|
|
d302cbb114 | ||
|
|
e3b407db28 | ||
|
|
4ef623f09e | ||
|
|
253530a63d | ||
|
|
4f38d989f5 | ||
|
|
84074e90ee | ||
|
|
38aee7d8f2 | ||
|
|
64198313c6 | ||
|
|
d61b6c301c | ||
|
|
83d1931266 | ||
|
|
c31f2ab285 | ||
|
|
0ddc5721b4 | ||
|
|
98bd183bc4 | ||
|
|
aaa154524c | ||
|
|
beced68337 | ||
|
|
94823ab952 | ||
|
|
0b6a19802f | ||
|
|
c4a2d2197c | ||
|
|
269d06aa15 | ||
|
|
dfef1f2c54 | ||
|
|
b62beaba0b | ||
|
|
adf414e40f | ||
|
|
dc64e57f63 | ||
|
|
d3e410b2ac | ||
|
|
c544b2474b | ||
|
|
18243de358 | ||
|
|
6625895d1f | ||
|
|
f9ecce739e | ||
|
|
0075dd8386 | ||
|
|
eef1cde816 | ||
|
|
8d867c30c6 | ||
|
|
42c668b7ae | ||
|
|
b62227b4ae | ||
|
|
25ef0cb87b | ||
|
|
e195941aa5 | ||
|
|
e09eef1dd7 | ||
|
|
7c13663a4e | ||
|
|
5753869e5e | ||
|
|
ba878a19f4 | ||
|
|
55a9de78cd | ||
|
|
ff51fc9091 | ||
|
|
a4f857ee34 | ||
|
|
3250d74bef | ||
|
|
c086160239 | ||
|
|
6cdccaff53 | ||
|
|
a9ab8de25d | ||
|
|
2a29cb18a5 | ||
|
|
4193a4f415 | ||
|
|
0226ec450a | ||
|
|
020b8ebb35 | ||
|
|
1170b30c1b | ||
|
|
0004d4a906 | ||
|
|
cb27e86266 | ||
|
|
77a3b2ea5c | ||
|
|
099e65f3b6 | ||
|
|
befb8db120 | ||
|
|
9992d826b1 | ||
|
|
18604e1a39 | ||
|
|
312c569182 | ||
|
|
b43e0ed130 | ||
|
|
289debea34 | ||
|
|
ccd6af7016 | ||
|
|
effc69e4e4 | ||
|
|
c7a0d0db64 | ||
|
|
50d69a1ca4 | ||
|
|
8a6b8fe70a | ||
|
|
c4e53aea71 | ||
|
|
ad5125e93f | ||
|
|
8d92cbac93 | ||
|
|
0225443ec8 | ||
|
|
71e1d0a334 | ||
|
|
83f69e02fd | ||
|
|
e1b2da1ff0 | ||
|
|
5eb1b90a4b | ||
|
|
9c4ee74b91 | ||
|
|
f65f566829 | ||
|
|
c8ad3123b7 | ||
|
|
8cefce28cf | ||
|
|
a834d26885 | ||
|
|
810e3cd551 | ||
|
|
f258fa96cd | ||
|
|
757ec61f14 | ||
|
|
2c933f43d8 | ||
|
|
cc5bfa8af8 | ||
|
|
de9f3e55f1 | ||
|
|
ed0c986218 | ||
|
|
72c27215b6 | ||
|
|
c23b14f768 | ||
|
|
81282f9c4d | ||
|
|
2b324f6f81 | ||
|
|
049f110344 | ||
|
|
448a0307a8 | ||
|
|
7390e42f5c | ||
|
|
ee880d229f | ||
|
|
9cd07d81f8 | ||
|
|
b453d089c3 | ||
|
|
7410fe1d1e | ||
|
|
6323a77431 | ||
|
|
0aedaa8553 | ||
|
|
6554479d39 | ||
|
|
ce2ebd3198 | ||
|
|
13ea1efc96 | ||
|
|
ef380321cf | ||
|
|
294b037730 |
7
.github/workflows/publish_test.yaml
vendored
7
.github/workflows/publish_test.yaml
vendored
@@ -1,10 +1,6 @@
|
||||
name: publish-test
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
on: workflow_dispatch
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -14,7 +10,6 @@ jobs:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.gitref }}
|
||||
fetch-tags: true
|
||||
fetch-depth: 100
|
||||
- name: Set up Python
|
||||
|
||||
11
.github/workflows/tests.yaml
vendored
11
.github/workflows/tests.yaml
vendored
@@ -25,14 +25,7 @@ jobs:
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Cache virtual environment
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
# We are hashing requirements-dev.txt and requirements-extra.txt which
|
||||
# contain all dependencies needed to run the tests and examples.
|
||||
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('linux-py3.10-requirements.txt') }}-${{ hashFiles('dev-requirements.txt') }}
|
||||
path: .venv
|
||||
python-version: "3.10"
|
||||
- name: Install system packages
|
||||
run: sudo apt-get install -y portaudio19-dev
|
||||
- name: Setup virtual environment
|
||||
@@ -42,7 +35,7 @@ jobs:
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r linux-py3.10-requirements.txt -r dev-requirements.txt
|
||||
pip install -r dev-requirements.txt
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
|
||||
417
CHANGELOG.md
417
CHANGELOG.md
@@ -5,6 +5,423 @@ All notable changes to **pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Added new `LmntTTSService` text-to-speech service.
|
||||
(see https://www.lmnt.com/)
|
||||
|
||||
- Added `TTSModelUpdateFrame`, `TTSLanguageUpdateFrame`, `STTModelUpdateFrame`,
|
||||
and `STTLanguageUpdateFrame` frames to allow you to switch models, language
|
||||
and voices in TTS and STT services.
|
||||
|
||||
- Added new `transcriptions.Language` enum.
|
||||
|
||||
### Changed
|
||||
|
||||
- `DailyTransport.on_joined` event now returns the full session data instead of
|
||||
just the participant.
|
||||
|
||||
- `CartesiaTTSService` is now a subclass of `TTSService`.
|
||||
|
||||
- `DeepgramSTTService` is now a subclass of `STTService`.
|
||||
|
||||
- `WhisperSTTService` is now a subclass of `SegmentedSTTService`. A
|
||||
`SegmentedSTTService` is a `STTService` where the provided audio is given in a
|
||||
big chunk (i.e. from when the user starts speaking until the user stops
|
||||
speaking) instead of a continous stream.
|
||||
|
||||
## [0.0.41] - 2024-08-22
|
||||
|
||||
### Added
|
||||
|
||||
- Added `LivekitFrameSerializer` audio frame serializer.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix `FastAPIWebsocketOutputTransport` variable name clash with subclass.
|
||||
|
||||
- Fix an `AnthropicLLMService` issue with empty arguments in function calling.
|
||||
|
||||
### Other
|
||||
|
||||
- Fixed `studypal` example errors.
|
||||
|
||||
## [0.0.40] - 2024-08-20
|
||||
|
||||
### Added
|
||||
|
||||
- VAD parameters can now be dynamicallt updated using the
|
||||
`VADParamsUpdateFrame`.
|
||||
|
||||
- `ErrorFrame` has now a `fatal` field to indicate the bot should exit if a
|
||||
fatal error is pushed upstream (false by default). A new `FatalErrorFrame`
|
||||
that sets this flag to true has been added.
|
||||
|
||||
- `AnthropicLLMService` now supports function calling and initial support for
|
||||
prompt caching.
|
||||
(see https://www.anthropic.com/news/prompt-caching)
|
||||
|
||||
- `ElevenLabsTTSService` can now specify ElevenLabs input parameters such as
|
||||
`output_format`.
|
||||
|
||||
- `TwilioFrameSerializer` can now specify Twilio's and Pipecat's desired sample
|
||||
rates to use.
|
||||
|
||||
- Added new `on_participant_updated` event to `DailyTransport`.
|
||||
|
||||
- Added `DailyRESTHelper.delete_room_by_name()` and
|
||||
`DailyRESTHelper.delete_room_by_url()`.
|
||||
|
||||
- Added LLM and TTS usage metrics. Those are enabled when
|
||||
`PipelineParams.enable_usage_metrics` is True.
|
||||
|
||||
- `AudioRawFrame`s are now pushed downstream from the base output
|
||||
transport. This allows capturing the exact words the bot says by adding an STT
|
||||
service at the end of the pipeline.
|
||||
|
||||
- Added new `GStreamerPipelineSource`. This processor can generate image or
|
||||
audio frames from a GStreamer pipeline (e.g. reading an MP4 file, and RTP
|
||||
stream or anything supported by GStreamer).
|
||||
|
||||
- Added `TransportParams.audio_out_is_live`. This flag is False by default and
|
||||
it is useful to indicate we should not synchronize audio with sporadic images.
|
||||
|
||||
- Added new `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame` control
|
||||
frames. These frames are pushed upstream and they should wrap
|
||||
`BotSpeakingFrame`.
|
||||
|
||||
- Transports now allow you to register event handlers without decorators.
|
||||
|
||||
### Changed
|
||||
|
||||
- Support RTVI message protocol 0.1. This includes new messages, support for
|
||||
messages responses, support for actions, configuration, webhooks and a bunch
|
||||
of new cool stuff.
|
||||
(see https://docs.rtvi.ai/)
|
||||
|
||||
- `SileroVAD` dependency is now imported via pip's `silero-vad` package.
|
||||
|
||||
- `ElevenLabsTTSService` now uses `eleven_turbo_v2_5` model by default.
|
||||
|
||||
- `BotSpeakingFrame` is now a control frame.
|
||||
|
||||
- `StartFrame` is now a control frame similar to `EndFrame`.
|
||||
|
||||
- `DeepgramTTSService` now is more customizable. You can adjust the encoding and
|
||||
sample rate.
|
||||
|
||||
### Fixed
|
||||
|
||||
- `TTSStartFrame` and `TTSStopFrame` are now sent when TTS really starts and
|
||||
stops. This allows for knowing when the bot starts and stops speaking even
|
||||
with asynchronous services (like Cartesia).
|
||||
|
||||
- Fixed `AzureSTTService` transcription frame timestamps.
|
||||
|
||||
- Fixed an issue with `DailyRESTHelper.create_room()` expirations which would
|
||||
cause this function to stop working after the initial expiration elapsed.
|
||||
|
||||
- Improved `EndFrame` and `CancelFrame` handling. `EndFrame` should end things
|
||||
gracefully while a `CancelFrame` should cancel all running tasks as soon as
|
||||
possible.
|
||||
|
||||
- Fixed an issue in `AIService` that would cause a yielded `None` value to be
|
||||
processed.
|
||||
|
||||
- RTVI's `bot-ready` message is now sent when the RTVI pipeline is ready and
|
||||
a first participant joins.
|
||||
|
||||
- Fixed a `BaseInputTransport` issue that was causing incoming system frames to
|
||||
be queued instead of being pushed immediately.
|
||||
|
||||
- Fixed a `BaseInputTransport` issue that was causing start/stop interruptions
|
||||
incoming frames to not cancel tasks and be processed properly.
|
||||
|
||||
### Other
|
||||
|
||||
- Added `studypal` example (from to the Cartesia folks!).
|
||||
|
||||
- Most examples now use Cartesia.
|
||||
|
||||
- Added examples `foundational/19a-tools-anthropic.py`,
|
||||
`foundational/19b-tools-video-anthropic.py` and
|
||||
`foundational/19a-tools-togetherai.py`.
|
||||
|
||||
- Added examples `foundational/18-gstreamer-filesrc.py` and
|
||||
`foundational/18a-gstreamer-videotestsrc.py` that show how to use
|
||||
`GStreamerPipelineSource`
|
||||
|
||||
- Remove `requests` library usage.
|
||||
|
||||
- Cleanup examples and use `DailyRESTHelper`.
|
||||
|
||||
## [0.0.39] - 2024-07-23
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a regression introduced in 0.0.38 that would cause Daily transcription
|
||||
to stop the Pipeline.
|
||||
|
||||
## [0.0.38] - 2024-07-23
|
||||
|
||||
### Added
|
||||
|
||||
- Added `force_reload`, `skip_validation` and `trust_repo` to `SileroVAD` and
|
||||
`SileroVADAnalyzer`. This allows caching and various GitHub repo validations.
|
||||
|
||||
- Added `send_initial_empty_metrics` flag to `PipelineParams` to request for
|
||||
initial empty metrics (zero values). True by default.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed initial metrics format. It was using the wrong keys name/time instead of
|
||||
processor/value.
|
||||
|
||||
- STT services should be using ISO 8601 time format for transcription frames.
|
||||
|
||||
- Fixed an issue that would cause Daily transport to show a stop transcription
|
||||
error when actually none occurred.
|
||||
|
||||
## [0.0.37] - 2024-07-22
|
||||
|
||||
### Added
|
||||
|
||||
- Added `RTVIProcessor` which implements the RTVI-AI standard.
|
||||
See https://github.com/rtvi-ai
|
||||
|
||||
- Added `BotInterruptionFrame` which allows interrupting the bot while talking.
|
||||
|
||||
- Added `LLMMessagesAppendFrame` which allows appending messages to the current
|
||||
LLM context.
|
||||
|
||||
- Added `LLMMessagesUpdateFrame` which allows changing the LLM context for the
|
||||
one provided in this new frame.
|
||||
|
||||
- Added `LLMModelUpdateFrame` which allows updating the LLM model.
|
||||
|
||||
- Added `TTSSpeakFrame` which causes the bot say some text. This text will not
|
||||
be part of the LLM context.
|
||||
|
||||
- Added `TTSVoiceUpdateFrame` which allows updating the TTS voice.
|
||||
|
||||
### Removed
|
||||
|
||||
- We remove the `LLMResponseStartFrame` and `LLMResponseEndFrame` frames. These
|
||||
were added in the past to properly handle interruptions for the
|
||||
`LLMAssistantContextAggregator`. But the `LLMContextAggregator` is now based
|
||||
on `LLMResponseAggregator` which handles interruptions properly by just
|
||||
processing the `StartInterruptionFrame`, so there's no need for these extra
|
||||
frames any more.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with `StatelessTextTransformer` where it was pushing a string
|
||||
instead of a `TextFrame`.
|
||||
|
||||
- `TTSService` end of sentence detection has been improved. It now works with
|
||||
acronyms, numbers, hours and others.
|
||||
|
||||
- Fixed an issue in `TTSService` that would not properly flush the current
|
||||
aggregated sentence if an `LLMFullResponseEndFrame` was found.
|
||||
|
||||
### Performance
|
||||
|
||||
- `CartesiaTTSService` now uses websockets which improves speed. It also
|
||||
leverages the new Cartesia contexts which maintains generated audio prosody
|
||||
when multiple inputs are sent, therefore improving audio quality a lot.
|
||||
|
||||
## [0.0.36] - 2024-07-02
|
||||
|
||||
### Added
|
||||
|
||||
- Added `GladiaSTTService`.
|
||||
See https://docs.gladia.io/chapters/speech-to-text-api/pages/live-speech-recognition
|
||||
|
||||
- Added `XTTSService`. This is a local Text-To-Speech service.
|
||||
See https://github.com/coqui-ai/TTS
|
||||
|
||||
- Added `UserIdleProcessor`. This processor can be used to wait for any
|
||||
interaction with the user. If the user doesn't say anything within a given
|
||||
timeout a provided callback is called.
|
||||
|
||||
- Added `IdleFrameProcessor`. This processor can be used to wait for frames
|
||||
within a given timeout. If no frame is received within the timeout a provided
|
||||
callback is called.
|
||||
|
||||
- Added new frame `BotSpeakingFrame`. This frame will be continuously pushed
|
||||
upstream while the bot is talking.
|
||||
|
||||
- It is now possible to specify a Silero VAD version when using `SileroVADAnalyzer`
|
||||
or `SileroVAD`.
|
||||
|
||||
- Added `AysncFrameProcessor` and `AsyncAIService`. Some services like
|
||||
`DeepgramSTTService` need to process things asynchronously. For example, audio
|
||||
is sent to Deepgram but transcriptions are not returned immediately. In these
|
||||
cases we still require all frames (except system frames) to be pushed
|
||||
downstream from a single task. That's what `AsyncFrameProcessor` is for. It
|
||||
creates a task and all frames should be pushed from that task. So, whenever a
|
||||
new Deepgram transcription is ready that transcription will also be pushed
|
||||
from this internal task.
|
||||
|
||||
- The `MetricsFrame` now includes processing metrics if metrics are enabled. The
|
||||
processing metrics indicate the time a processor needs to generate all its
|
||||
output. Note that not all processors generate these kind of metrics.
|
||||
|
||||
### Changed
|
||||
|
||||
- `WhisperSTTService` model can now also be a string.
|
||||
|
||||
- Added missing * keyword separators in services.
|
||||
|
||||
### Fixed
|
||||
|
||||
- `WebsocketServerTransport` doesn't try to send frames anymore if serializers
|
||||
returns `None`.
|
||||
|
||||
- Fixed an issue where exceptions that occurred inside frame processors were
|
||||
being swallowed and not displayed.
|
||||
|
||||
- Fixed an issue in `FastAPIWebsocketTransport` where it would still try to send
|
||||
data to the websocket after being closed.
|
||||
|
||||
### Other
|
||||
|
||||
- Added Fly.io deployment example in `examples/deployment/flyio-example`.
|
||||
|
||||
- Added new `17-detect-user-idle.py` example that shows how to use the new
|
||||
`UserIdleProcessor`.
|
||||
|
||||
## [0.0.35] - 2024-06-28
|
||||
|
||||
### Changed
|
||||
|
||||
- `FastAPIWebsocketParams` now require a serializer.
|
||||
|
||||
- `TwilioFrameSerializer` now requires a `streamSid`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Silero VAD number of frames needs to be 512 for 16000 sample rate or 256 for
|
||||
8000 sample rate.
|
||||
|
||||
## [0.0.34] - 2024-06-25
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with asynchronous STT services (Deepgram and Azure) that could
|
||||
interruptions to ignore transcriptions.
|
||||
|
||||
- Fixed an issue introduced in 0.0.33 that would cause the LLM to generate
|
||||
shorter output.
|
||||
|
||||
## [0.0.33] - 2024-06-25
|
||||
|
||||
### Changed
|
||||
|
||||
- Upgraded to Cartesia's new Python library 1.0.0. `CartesiaTTSService` now
|
||||
expects a voice ID instead of a voice name (you can get the voice ID from
|
||||
Cartesia's playground). You can also specify the audio `sample_rate` and
|
||||
`encoding` instead of the previous `output_format`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with asynchronous STT services (Deepgram and Azure) that could
|
||||
cause static audio issues and interruptions to not work properly when dealing
|
||||
with multiple LLMs sentences.
|
||||
|
||||
- Fixed an issue that could mix new LLM responses with previous ones when
|
||||
handling interruptions.
|
||||
|
||||
- Fixed a Daily transport blocking situation that occurred while reading audio
|
||||
frames after a participant left the room. Needs daily-python >= 0.10.1.
|
||||
|
||||
## [0.0.32] - 2024-06-22
|
||||
|
||||
### Added
|
||||
|
||||
- Allow specifying a `DeepgramSTTService` url which allows using on-prem
|
||||
Deepgram.
|
||||
|
||||
- Added new `FastAPIWebsocketTransport`. This is a new websocket transport that
|
||||
can be integrated with FastAPI websockets.
|
||||
|
||||
- Added new `TwilioFrameSerializer`. This is a new serializer that knows how to
|
||||
serialize and deserialize audio frames from Twilio.
|
||||
|
||||
- Added Daily transport event: `on_dialout_answered`. See
|
||||
https://reference-python.daily.co/api_reference.html#daily.EventHandler
|
||||
|
||||
- Added new `AzureSTTService`. This allows you to use Azure Speech-To-Text.
|
||||
|
||||
### Performance
|
||||
|
||||
- Convert `BaseOutputTransport` and `BaseOutputTransport` to fully use asyncio
|
||||
and remove the use of threads.
|
||||
|
||||
### Other
|
||||
|
||||
- Added `twilio-chatbot`. This is an example that shows how to integrate Twilio
|
||||
phone numbers with a Pipecat bot.
|
||||
|
||||
- Updated `07f-interruptible-azure.py` to use `AzureLLMService`,
|
||||
`AzureSTTService` and `AzureTTSService`.
|
||||
|
||||
## [0.0.31] - 2024-06-13
|
||||
|
||||
### Performance
|
||||
|
||||
- Break long audio frames into 20ms chunks instead of 10ms.
|
||||
|
||||
## [0.0.30] - 2024-06-13
|
||||
|
||||
### Added
|
||||
|
||||
- Added `report_only_initial_ttfb` to `PipelineParams`. This will make it so
|
||||
only the initial TTFB metrics after the user stops talking are reported.
|
||||
|
||||
- Added `OpenPipeLLMService`. This service will let you run OpenAI through
|
||||
OpenPipe's SDK.
|
||||
|
||||
- Allow specifying frame processors' name through a new `name` constructor
|
||||
argument.
|
||||
|
||||
- Added `DeepgramSTTService`. This service has an ongoing websocket
|
||||
connection. To handle this, it subclasses `AIService` instead of
|
||||
`STTService`. The output of this service will be pushed from the same task,
|
||||
except system frames like `StartFrame`, `CancelFrame` or
|
||||
`StartInterruptionFrame`.
|
||||
|
||||
### Changed
|
||||
|
||||
- `FrameSerializer.deserialize()` can now return `None` in case it is not
|
||||
possible to desearialize the given data.
|
||||
|
||||
- `daily_rest.DailyRoomProperties` now allows extra unknown parameters.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where `DailyRoomProperties.exp` always had the same old
|
||||
timestamp unless set by the user.
|
||||
|
||||
- Fixed a couple of issues with `WebsocketServerTransport`. It needed to use
|
||||
`push_audio_frame()` and also VAD was not working properly.
|
||||
|
||||
- Fixed an issue that would cause LLM aggregator to fail with small
|
||||
`VADParams.stop_secs` values.
|
||||
|
||||
- Fixed an issue where `BaseOutputTransport` would send longer audio frames
|
||||
preventing interruptions.
|
||||
|
||||
### Other
|
||||
|
||||
- Added new `07h-interruptible-openpipe.py` example. This example shows how to
|
||||
use OpenPipe to run OpenAI LLMs and get the logs stored in OpenPipe.
|
||||
|
||||
- Added new `dialin-chatbot` example. This examples shows how to call the bot
|
||||
using a phone number.
|
||||
|
||||
## [0.0.29] - 2024-06-07
|
||||
|
||||
### Added
|
||||
|
||||
34
README.md
34
README.md
@@ -4,8 +4,7 @@
|
||||
|
||||
# Pipecat
|
||||
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://discord.gg/pipecat)
|
||||
[](https://pypi.org/project/pipecat-ai) [](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
|
||||
|
||||
`pipecat` is a framework for building voice (and multimodal) conversational agents. Things like personal coaches, meeting assistants, [story-telling toys for kids](https://storytelling-chatbot.fly.dev/), customer support bots, [intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0), and snarky social companions.
|
||||
|
||||
@@ -39,7 +38,7 @@ pip install "pipecat-ai[option,...]"
|
||||
|
||||
Your project may or may not need these, so they're made available as optional requirements. Here is a list:
|
||||
|
||||
- **AI services**: `anthropic`, `azure`, `deepgram`, `google`, `fal`, `moondream`, `openai`, `playht`, `silero`, `whisper`
|
||||
- **AI services**: `anthropic`, `azure`, `deepgram`, `gladia`, `google`, `fal`, `lmnt`, `moondream`, `openai`, `openpipe`, `playht`, `silero`, `whisper`, `xtts`
|
||||
- **Transports**: `local`, `websocket`, `daily`
|
||||
|
||||
## Code examples
|
||||
@@ -49,7 +48,7 @@ Your project may or may not need these, so they're made available as optional re
|
||||
|
||||
## A simple voice agent running locally
|
||||
|
||||
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [ElevenLabs](https://elevenlabs.io/) for text-to-speech.
|
||||
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [Cartesia](https://cartesia.ai/) for text-to-speech.
|
||||
|
||||
```python
|
||||
#app.py
|
||||
@@ -61,7 +60,7 @@ from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
async def main():
|
||||
@@ -70,14 +69,13 @@ async def main():
|
||||
transport = DailyTransport(
|
||||
room_url=...,
|
||||
token=...,
|
||||
"Bot Name",
|
||||
DailyParams(audio_out_enabled=True))
|
||||
bot_name="Bot Name",
|
||||
params=DailyParams(audio_out_enabled=True))
|
||||
|
||||
# Use Eleven Labs for Text-to-Speech
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=...,
|
||||
voice_id=...,
|
||||
# Use Cartesia for Text-to-Speech
|
||||
tts = CartesiaTTSService(
|
||||
api_key=...,
|
||||
voice_id=...
|
||||
)
|
||||
|
||||
# Simple pipeline that will process text to speech and output the result
|
||||
@@ -94,7 +92,7 @@ async def main():
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Eleven Labs)
|
||||
# Queue a TextFrame that will get spoken by the TTS service (Cartesia)
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
|
||||
# Run the pipeline task
|
||||
@@ -125,7 +123,7 @@ Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://
|
||||
|
||||
Voice Activity Detection — very important for knowing when a user has finished speaking to your bot. If you are not using press-to-talk, and want Pipecat to detect when the user has finished talking, VAD is an essential component for a natural feeling conversation.
|
||||
|
||||
Pipecast makes use of WebRTC VAD by default when using a WebRTC transport layer. Optionally, you can use Silero VAD for improved accuracy at the cost of higher CPU usage.
|
||||
Pipecat makes use of WebRTC VAD by default when using a WebRTC transport layer. Optionally, you can use Silero VAD for improved accuracy at the cost of higher CPU usage.
|
||||
|
||||
```shell
|
||||
pip install pipecat-ai[silero]
|
||||
@@ -146,20 +144,20 @@ source venv/bin/activate
|
||||
From the root of this repo, run the following:
|
||||
|
||||
```shell
|
||||
pip install -r dev-requirements.txt -r {env}-requirements.txt
|
||||
pip install -r dev-requirements.txt
|
||||
python -m build
|
||||
```
|
||||
|
||||
This builds the package. To use the package locally (eg to run sample files), run
|
||||
This builds the package. To use the package locally (e.g. to run sample files), run
|
||||
|
||||
```shell
|
||||
pip install --editable .
|
||||
pip install --editable ".[option,...]"
|
||||
```
|
||||
|
||||
If you want to use this package from another directory, you can run:
|
||||
|
||||
```shell
|
||||
pip install path_to_this_repo
|
||||
pip install "path_to_this_repo[option,...]"
|
||||
```
|
||||
|
||||
### Running tests
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
autopep8~=2.1.0
|
||||
autopep8~=2.3.1
|
||||
build~=1.2.1
|
||||
grpcio-tools~=1.62.2
|
||||
pip-tools~=7.4.1
|
||||
pytest~=8.2.0
|
||||
setuptools~=69.5.1
|
||||
pyright~=1.1.376
|
||||
pytest~=8.3.2
|
||||
setuptools~=72.2.0
|
||||
setuptools_scm~=8.1.0
|
||||
|
||||
@@ -27,9 +27,19 @@ FAL_KEY=...
|
||||
# Fireworks
|
||||
FIREWORKS_API_KEY=...
|
||||
|
||||
# Gladia
|
||||
GLADIA_API_KEY=...
|
||||
|
||||
# LMNT
|
||||
LMNT_API_KEY=...
|
||||
LMNT_VOICE_ID=...
|
||||
|
||||
# PlayHT
|
||||
PLAY_HT_USER_ID=...
|
||||
PLAY_HT_API_KEY=...
|
||||
|
||||
# OpenAI
|
||||
OPENAI_API_KEY=...
|
||||
|
||||
#OpenPipe
|
||||
OPENPIPE_API_KEY=...
|
||||
|
||||
@@ -32,13 +32,16 @@ Next, follow the steps in the README for each demo.
|
||||
|
||||
## Projects:
|
||||
|
||||
| Project | Description | Services |
|
||||
| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- |
|
||||
| [Simple Chatbot](simple-chatbot) | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework. | Deepgram, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience. | Deepgram, ElevenLabs, Open AI, Fal, Daily, Custom UI |
|
||||
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, OpenAI, Moondream, Daily, Daily Prebuilt UI |
|
||||
| Function-calling Chatbot (TBC) | A chatbot that can call functions in response to user input | Deepgram, OpenAI, Fireworks, Daily, Daily Prebuilt UI |
|
||||
| Project | Description | Services |
|
||||
|----------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
|
||||
| [Simple Chatbot](simple-chatbot) | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience. | Deepgram, ElevenLabs, OpenAI, Fal, Daily, Custom UI |
|
||||
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, ElevenLabs, OpenAI, Moondream, Daily, Daily Prebuilt UI |
|
||||
| [Patient intake](patient-intake) | A chatbot that can call functions in response to user input. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
|
||||
| [Dialin Chatbot](dialin-chatbot) | A chatbot that connects to an incoming phone call from Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
|
||||
| [Twilio Chatbot](twilio-chatbot) | A chatbot that connects to an incoming phone call from Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
|
||||
| [studypal](studypal) | A chatbot to have a conversation about any article on the web | |
|
||||
|
||||
> [!IMPORTANT]
|
||||
> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.
|
||||
|
||||
13
examples/deployment/flyio-example/Dockerfile
Normal file
13
examples/deployment/flyio-example/Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
||||
FROM python:3.11-bullseye
|
||||
|
||||
# Open port 7860 for http service
|
||||
ENV FAST_API_PORT=7860
|
||||
EXPOSE 7860
|
||||
|
||||
# Install Python dependencies
|
||||
COPY *.py .
|
||||
COPY ./requirements.txt requirements.txt
|
||||
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
|
||||
|
||||
# Start the FastAPI server
|
||||
CMD python3 bot_runner.py --port ${FAST_API_PORT}
|
||||
39
examples/deployment/flyio-example/README.md
Normal file
39
examples/deployment/flyio-example/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# Fly.io deployment example
|
||||
|
||||
This project modifies the `bot_runner.py` server to launch a new machine for each user session. This is a recommended approach for production vs. running shell processess as your deployment will quickly run out of system resources under load.
|
||||
|
||||
For this example, we are using Daily as a WebRTC transport and provisioning a new room and token for each session. You can use another transport, such as WebSockets, by modifying the `bot.py` and `bot_runner.py` files accordingly.
|
||||
|
||||
## Setting up your fly.io deployment
|
||||
|
||||
### Create your fly.toml file
|
||||
|
||||
You can copy the `example-fly.toml` as a reference. Be sure to change the app name to something unique.
|
||||
|
||||
### Create your .env file
|
||||
|
||||
Copy the base `env.example` to `.env` and enter the necessary API keys.
|
||||
|
||||
`FLY_APP_NAME` should match that in the `fly.toml` file.
|
||||
|
||||
### Launch a new fly.io project
|
||||
|
||||
`fly launch` or `fly launch --org your-org-name`
|
||||
|
||||
### Set the necessary app secrets from your .env
|
||||
|
||||
Note: you can do this manually via the fly.io dashboard under the "secrets" sub-section of your deployment (e.g. "https://fly.io/apps/fly-app-name/secrets") or run the following terminal command:
|
||||
|
||||
`cat .env | tr '\n' ' ' | xargs flyctl secrets set`
|
||||
|
||||
### Deploy your machine
|
||||
|
||||
`fly deploy`
|
||||
|
||||
## Connecting to your bot
|
||||
|
||||
Send a post request to your running fly.io instance:
|
||||
|
||||
`curl --location --request POST 'https://YOUR_FLY_APP_NAME/start_bot'`
|
||||
|
||||
This request will wait until the machine enters into a `starting` state, before returning the a room URL and token to join.
|
||||
0
examples/deployment/flyio-example/__init__.py
Normal file
0
examples/deployment/flyio-example/__init__.py
Normal file
103
examples/deployment/flyio-example/bot.py
Normal file
103
examples/deployment/flyio-example/bot.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
daily_api_key = os.getenv("DAILY_API_KEY", "")
|
||||
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your output will be converted to audio so don't include special characters other than '!' or '?' in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying hello.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if state == "left":
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot")
|
||||
parser.add_argument("-u", type=str, help="Room URL")
|
||||
parser.add_argument("-t", type=str, help="Token")
|
||||
config = parser.parse_args()
|
||||
|
||||
asyncio.run(main(config.u, config.t))
|
||||
215
examples/deployment/flyio-example/bot_runner.py
Normal file
215
examples/deployment/flyio-example/bot_runner.py
Normal file
@@ -0,0 +1,215 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomParams)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# ------------ Configuration ------------ #
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
REQUIRED_ENV_VARS = [
|
||||
'DAILY_API_KEY',
|
||||
'OPENAI_API_KEY',
|
||||
'ELEVENLABS_API_KEY',
|
||||
'ELEVENLABS_VOICE_ID',
|
||||
'FLY_API_KEY',
|
||||
'FLY_APP_NAME',]
|
||||
|
||||
FLY_API_HOST = os.getenv("FLY_API_HOST", "https://api.machines.dev/v1")
|
||||
FLY_APP_NAME = os.getenv("FLY_APP_NAME", "pipecat-fly-example")
|
||||
FLY_API_KEY = os.getenv("FLY_API_KEY", "")
|
||||
FLY_HEADERS = {
|
||||
'Authorization': f"Bearer {FLY_API_KEY}",
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
# ----------------- API ----------------- #
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
|
||||
aiohttp_session=aiohttp_session
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"]
|
||||
)
|
||||
|
||||
# ----------------- Main ----------------- #
|
||||
|
||||
|
||||
async def spawn_fly_machine(room_url: str, token: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use the same image as the bot runner
|
||||
async with session.get(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Unable to get machine info from Fly: {text}")
|
||||
|
||||
data = await r.json()
|
||||
image = data[0]['config']['image']
|
||||
|
||||
# Machine configuration
|
||||
cmd = f"python3 bot.py -u {room_url} -t {token}"
|
||||
cmd = cmd.split()
|
||||
worker_props = {
|
||||
"config": {
|
||||
"image": image,
|
||||
"auto_destroy": True,
|
||||
"init": {
|
||||
"cmd": cmd
|
||||
},
|
||||
"restart": {
|
||||
"policy": "no"
|
||||
},
|
||||
"guest": {
|
||||
"cpu_kind": "shared",
|
||||
"cpus": 1,
|
||||
"memory_mb": 1024
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
# Spawn a new machine instance
|
||||
async with session.post(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS, json=worker_props) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Problem starting a bot worker: {text}")
|
||||
|
||||
data = await r.json()
|
||||
# Wait for the machine to enter the started state
|
||||
vm_id = data['id']
|
||||
|
||||
async with session.get(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started", headers=FLY_HEADERS) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Bot was unable to enter started state: {text}")
|
||||
|
||||
print(f"Machine joined room: {room_url}")
|
||||
|
||||
|
||||
@app.post("/start_bot")
|
||||
async def start_bot(request: Request) -> JSONResponse:
|
||||
try:
|
||||
data = await request.json()
|
||||
# Is this a webhook creation request?
|
||||
if "test" in data:
|
||||
return JSONResponse({"test": True})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Use specified room URL, or create a new one if not specified
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", "")
|
||||
|
||||
if not room_url:
|
||||
params = DailyRoomParams(
|
||||
properties=DailyRoomProperties()
|
||||
)
|
||||
try:
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Unable to provision room {e}")
|
||||
else:
|
||||
# Check passed room URL exists, we should assume that it already has a sip set up
|
||||
try:
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
# Launch a new fly.io machine, or run as a shell process (not recommended)
|
||||
run_as_process = os.getenv("RUN_AS_PROCESS", False)
|
||||
|
||||
if run_as_process:
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
else:
|
||||
try:
|
||||
await spawn_fly_machine(room.url, token)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to spawn VM: {e}")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
user_token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
return JSONResponse({
|
||||
"room_url": room.url,
|
||||
"token": user_token,
|
||||
})
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
for env_var in REQUIRED_ENV_VARS:
|
||||
if env_var not in os.environ:
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=os.getenv("HOST", "0.0.0.0"), help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
default=False, help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(
|
||||
"bot_runner:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
reload=config.reload
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("Pipecat runner shutting down...")
|
||||
8
examples/deployment/flyio-example/env.example
Normal file
8
examples/deployment/flyio-example/env.example
Normal file
@@ -0,0 +1,8 @@
|
||||
DAILY_API_KEY=
|
||||
DAILY_SAMPLE_ROOM_URL= # Enter a Daily room URL to use a set room URL each time (useful for local testing)
|
||||
OPENAI_API_KEY=
|
||||
ELEVENLABS_API_KEY=
|
||||
ELEVENLABS_VOICE_ID=
|
||||
FLY_API_KEY=
|
||||
FLY_APP_NAME=
|
||||
RUN_AS_PROCESS= # Spawn fly.io machine for each session or run as local process
|
||||
25
examples/deployment/flyio-example/example-fly.toml
Normal file
25
examples/deployment/flyio-example/example-fly.toml
Normal file
@@ -0,0 +1,25 @@
|
||||
# fly.toml app configuration file generated for pipecat-fly-example on 2024-07-01T15:04:53+01:00
|
||||
#
|
||||
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
|
||||
#
|
||||
|
||||
app = 'pipecat-fly-example'
|
||||
primary_region = 'sjc'
|
||||
|
||||
[build]
|
||||
|
||||
[env]
|
||||
FLY_APP_NAME = 'pipecat-fly-example'
|
||||
|
||||
[http_service]
|
||||
internal_port = 7860
|
||||
force_https = true
|
||||
auto_stop_machines = true
|
||||
auto_start_machines = true
|
||||
min_machines_running = 0
|
||||
processes = ['app']
|
||||
|
||||
[[vm]]
|
||||
memory = 512
|
||||
cpu_kind = 'shared'
|
||||
cpus = 1
|
||||
5
examples/deployment/flyio-example/requirements.txt
Normal file
5
examples/deployment/flyio-example/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
pipecat-ai[daily,openai,silero]
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
loguru
|
||||
3
examples/dialin-chatbot/.dockerignore
Normal file
3
examples/dialin-chatbot/.dockerignore
Normal file
@@ -0,0 +1,3 @@
|
||||
**/.DS_Store
|
||||
.env
|
||||
.env.*
|
||||
165
examples/dialin-chatbot/.gitignore
vendored
Normal file
165
examples/dialin-chatbot/.gitignore
vendored
Normal file
@@ -0,0 +1,165 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
runpod.toml
|
||||
|
||||
# custom script to recursively upgrade items in requirements.py
|
||||
upgrade_requirements.py
|
||||
.DS_Store
|
||||
40
examples/dialin-chatbot/Dockerfile
Normal file
40
examples/dialin-chatbot/Dockerfile
Normal file
@@ -0,0 +1,40 @@
|
||||
FROM python:3.11-bullseye
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
ARG USE_PERSISTENT_DATA
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
# Expose FastAPI port
|
||||
ENV FAST_API_PORT=7860
|
||||
EXPOSE 7860
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install --no-install-recommends -y \
|
||||
build-essential \
|
||||
git \
|
||||
ffmpeg \
|
||||
google-perftools \
|
||||
ca-certificates curl gnupg \
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set up a new user named "user" with user ID 1000
|
||||
RUN useradd -m -u 1000 user
|
||||
|
||||
# Set home to the user's home directory
|
||||
ENV HOME=/home/user \
|
||||
PATH=/home/user/.local/bin:$PATH \
|
||||
PYTHONPATH=$HOME/app \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
# Switch to the "user" user
|
||||
USER user
|
||||
|
||||
# Set the working directory to the user's home directory
|
||||
WORKDIR $HOME/app
|
||||
|
||||
# Install Python dependencies
|
||||
COPY *.py .
|
||||
COPY ./requirements.txt requirements.txt
|
||||
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
|
||||
|
||||
# Start the FastAPI server
|
||||
CMD python3 bot_runner.py --host "0.0.0.0" --port ${FAST_API_PORT}
|
||||
85
examples/dialin-chatbot/README.md
Normal file
85
examples/dialin-chatbot/README.md
Normal file
@@ -0,0 +1,85 @@
|
||||
<div align="center">
|
||||
<img alt="pipecat" width="300px" height="auto" src="image.png">
|
||||
</div>
|
||||
|
||||
# Dialin example
|
||||
|
||||
Example project that demonstrates how to add phone number dialin to your Pipecat bots. We include examples for both Daily (`bot_daily.py`) and Twilio (`bot_twilio.py`), depending on who you want to use as a phone vendor.
|
||||
|
||||
- 🔁 Transport: Daily WebRTC
|
||||
- 💬 Speech-to-Text: Deepgram via Daily transport
|
||||
- 🤖 LLM: GPT4-o / OpenAI
|
||||
- 🔉 Text-to-Speech: ElevenLabs
|
||||
|
||||
#### Should I use Daily or Twilio as a vendor?
|
||||
|
||||
If you're starting from scratch, using Daily to provision phone numbers alongside Daily as a transport offers some convenience (such as automatic call forwarding.)
|
||||
|
||||
If you already have Twilio numbers and workflows that you want to connect to your Pipecat bots, there is some additional configuration required (you'll need to create a `on_dialin_ready` and use the Twilio client to trigger the forward.)
|
||||
|
||||
You can read more about this, as well as see respective walkthroughs in our docs.
|
||||
|
||||
## Setup
|
||||
|
||||
```shell
|
||||
# Install the requirements
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Setup your env
|
||||
mv env.example .env
|
||||
```
|
||||
|
||||
## Using Daily numbers
|
||||
|
||||
Run `bot_runner.py` to handle incoming HTTP requests:
|
||||
|
||||
`python bot_runner.py --host localhost`
|
||||
|
||||
Then target the following URL:
|
||||
|
||||
`POST /daily_start_bot`
|
||||
|
||||
For more configuration options, please consult Daily's API documentation.
|
||||
|
||||
|
||||
## Using Twilio numbers
|
||||
|
||||
As above, but target the following URL:
|
||||
|
||||
`POST /twilio_start_bot`
|
||||
|
||||
For more configuration options, please consult Twilio's API documentation.
|
||||
|
||||
## Deployment example
|
||||
|
||||
A Dockerfile is included in this demo for convenience. Here is an example of how to build and deploy your bot to [fly.io](https://fly.io).
|
||||
|
||||
*Please note: This demo spawns agents as subprocesses for convenience / demonstration purposes. You would likely not want to do this in production as it would limit concurrency to available system resources. For more information on how to deploy your bots using VMs, refer to the Pipecat documentation.*
|
||||
|
||||
### Build the docker image
|
||||
|
||||
`docker build -t tag:project .`
|
||||
|
||||
### Launch the fly project
|
||||
|
||||
`mv fly.example.toml fly.toml`
|
||||
|
||||
`fly launch` (using the included fly.toml)
|
||||
|
||||
### Setup your secrets on Fly
|
||||
|
||||
Set the necessary secrets (found in `env.example`)
|
||||
|
||||
`fly secrets set DAILY_API_KEY=... OPENAI_API_KEY=... ELEVENLABS_API_KEY=... ELEVENLABS_VOICE_ID=...`
|
||||
|
||||
If you're using Twilio as a number vendor:
|
||||
|
||||
`fly secrets set TWILIO_ACCOUNT_SID=... TWILIO_AUTH_TOKEN=...`
|
||||
|
||||
### Deploy!
|
||||
|
||||
`fly deploy`
|
||||
|
||||
## Need to do something more advanced?
|
||||
|
||||
This demo covers the basics of bot telephony. If you want to know more about working with PSTN / SIP, please ping us on [Discord](https://discord.gg/pipecat).
|
||||
111
examples/dialin-chatbot/bot_daily.py
Normal file
111
examples/dialin-chatbot/bot_daily.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.frames.frames import (
|
||||
LLMMessagesFrame,
|
||||
EndFrame
|
||||
)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyDialinSettings
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
daily_api_key = os.getenv("DAILY_API_KEY", "")
|
||||
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str, callId: str, callDomain: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# diallin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
diallin_settings = DailyDialinSettings(
|
||||
call_id=callId,
|
||||
call_domain=callDomain
|
||||
)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=diallin_settings,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Oh, hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Pipecat Simple ChatBot")
|
||||
parser.add_argument("-u", type=str, help="Room URL")
|
||||
parser.add_argument("-t", type=str, help="Token")
|
||||
parser.add_argument("-i", type=str, help="Call ID")
|
||||
parser.add_argument("-d", type=str, help="Call Domain")
|
||||
config = parser.parse_args()
|
||||
|
||||
asyncio.run(main(config.u, config.t, config.i, config.d))
|
||||
238
examples/dialin-chatbot/bot_runner.py
Normal file
238
examples/dialin-chatbot/bot_runner.py
Normal file
@@ -0,0 +1,238 @@
|
||||
"""
|
||||
bot_runner.py
|
||||
|
||||
HTTP service that listens for incoming calls from either Daily or Twilio,
|
||||
provisioning a room and starting a Pipecat bot in response.
|
||||
|
||||
Refer to README for more information.
|
||||
"""
|
||||
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, PlainTextResponse
|
||||
from twilio.twiml.voice_response import VoiceResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomObject,
|
||||
DailyRoomProperties,
|
||||
DailyRoomSipParams,
|
||||
DailyRoomParams)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# ------------ Configuration ------------ #
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
REQUIRED_ENV_VARS = ['OPENAI_API_KEY', 'DAILY_API_KEY',
|
||||
'ELEVENLABS_API_KEY', 'ELEVENLABS_VOICE_ID']
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
# ----------------- API ----------------- #
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
|
||||
aiohttp_session=aiohttp_session
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"]
|
||||
)
|
||||
|
||||
"""
|
||||
Create Daily room, tell the bot if the room is created for Twilio's SIP or Daily's SIP (vendor).
|
||||
When the vendor is Daily, the bot handles the call forwarding automatically,
|
||||
i.e, forwards the call from the "hold music state" to the Daily Room's SIP URI.
|
||||
|
||||
Alternatively, when the vendor is Twilio (not Daily), the bot is responsible for
|
||||
updating the state on Twilio. So when `dialin-ready` fires, it takes appropriate
|
||||
action using the Twilio Client library.
|
||||
"""
|
||||
|
||||
|
||||
async def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
|
||||
if not room_url:
|
||||
params = DailyRoomParams(
|
||||
properties=DailyRoomProperties(
|
||||
# Note: these are the default values, except for the display name
|
||||
sip=DailyRoomSipParams(
|
||||
display_name="dialin-user",
|
||||
video=False,
|
||||
sip_mode="dial-in",
|
||||
num_endpoints=1
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
print(f"Creating new room...")
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
|
||||
else:
|
||||
# Check passed room URL exist (we assume that it already has a sip set up!)
|
||||
try:
|
||||
print(f"Joining existing room: {room_url}")
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
print(f"Daily room: {room.url} {room.config.sip_endpoint}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get room or token token")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in docs)
|
||||
if vendor == "daily":
|
||||
bot_proc = f"python3 - m bot_daily - u {room.url} - t {token} - i {
|
||||
callId} - d {callDomain}"
|
||||
else:
|
||||
bot_proc = f"python3 - m bot_twilio - u {room.url} - t {
|
||||
token} - i {callId} - s {room.config.sip_endpoint}"
|
||||
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[bot_proc],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return room
|
||||
|
||||
|
||||
@app.post("/twilio_start_bot", response_class=PlainTextResponse)
|
||||
async def twilio_start_bot(request: Request):
|
||||
print(f"POST /twilio_voice_bot")
|
||||
|
||||
# twilio_start_bot is invoked directly by Twilio (as a web hook).
|
||||
# On Twilio, under Active Numbers, pick the phone number
|
||||
# Click Configure and under Voice Configuration,
|
||||
# "a call comes in" choose webhook and point the URL to
|
||||
# where this code is hosted.
|
||||
data = {}
|
||||
try:
|
||||
# shouldnt have received json, twilio sends form data
|
||||
form_data = await request.form()
|
||||
data = dict(form_data)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
|
||||
callId = data.get('CallSid')
|
||||
|
||||
if not callId:
|
||||
raise HTTPException(
|
||||
status_code=500, detail="Missing 'CallSid' in request")
|
||||
|
||||
print("CallId: %s" % callId)
|
||||
|
||||
# create room and tell the bot to join the created room
|
||||
# note: Twilio does not require a callDomain
|
||||
room: DailyRoomObject = await _create_daily_room(room_url, callId, None, "twilio")
|
||||
|
||||
print(f"Put Twilio on hold...")
|
||||
# We have the room and the SIP URI,
|
||||
# but we do not know if the Daily SIP Worker and the Bot have joined the call
|
||||
# put the call on hold until the 'on_dialin_ready' fires.
|
||||
# Then, the bot will update the called sid with the sip uri.
|
||||
# http://com.twilio.music.classical.s3.amazonaws.com/BusyStrings.mp3
|
||||
resp = VoiceResponse()
|
||||
resp.play(
|
||||
url="http://com.twilio.sounds.music.s3.amazonaws.com/MARKOVICHAMP-Borghestral.mp3", loop=10)
|
||||
return str(resp)
|
||||
|
||||
|
||||
@app.post("/daily_start_bot")
|
||||
async def daily_start_bot(request: Request) -> JSONResponse:
|
||||
# The /daily_start_bot is invoked when a call is received on Daily's SIP URI
|
||||
# daily_start_bot will create the room, put the call on hold until
|
||||
# the bot and sip worker are ready. Daily will automatically
|
||||
# forward the call to the SIP URi when dialin_ready fires.
|
||||
|
||||
# Use specified room URL, or create a new one if not specified
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
|
||||
# Get the dial-in properties from the request
|
||||
try:
|
||||
data = await request.json()
|
||||
if "test" in data:
|
||||
# Pass through any webhook checks
|
||||
return JSONResponse({"test": True})
|
||||
callId = data.get("callId", None)
|
||||
callDomain = data.get("callDomain", None)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing properties 'callId' or 'callDomain'")
|
||||
|
||||
print(f"CallId: {callId}, CallDomain: {callDomain}")
|
||||
room: DailyRoomObject = await _create_daily_room(room_url, callId, callDomain, "daily")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
return JSONResponse({
|
||||
"room_url": room.url,
|
||||
"sipUri": room.config.sip_endpoint
|
||||
})
|
||||
|
||||
# ----------------- Main ----------------- #
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
for env_var in REQUIRED_ENV_VARS:
|
||||
if env_var not in os.environ:
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=os.getenv("HOST", "0.0.0.0"), help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=os.getenv("PORT", 7860), help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
default=True, help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(
|
||||
"bot_runner:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
reload=config.reload
|
||||
)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Pipecat runner shutting down...")
|
||||
125
examples/dialin-chatbot/bot_twilio.py
Normal file
125
examples/dialin-chatbot/bot_twilio.py
Normal file
@@ -0,0 +1,125 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
|
||||
from pipecat.frames.frames import (
|
||||
LLMMessagesFrame,
|
||||
EndFrame
|
||||
)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from twilio.rest import Client
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
twilio_account_sid = os.getenv('TWILIO_ACCOUNT_SID')
|
||||
twilio_auth_token = os.getenv('TWILIO_AUTH_TOKEN')
|
||||
twilioclient = Client(twilio_account_sid, twilio_auth_token)
|
||||
|
||||
daily_api_key = os.getenv("DAILY_API_KEY", "")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str, callId: str, sipUri: str):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# diallin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=None, # Not required for Twilio
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Hello! Who dares dial me at this hour?!'.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_dialin_ready")
|
||||
async def on_dialin_ready(transport, cdata):
|
||||
# For Twilio, Telnyx, etc. You need to update the state of the call
|
||||
# and forward it to the sip_uri..
|
||||
print(f"Forwarding call: {callId} {sipUri}")
|
||||
|
||||
try:
|
||||
# The TwiML is updated using Twilio's client library
|
||||
call = twilioclient.calls(callId).update(
|
||||
twiml=f'<Response><Dial><Sip>{sipUri}</Sip></Dial></Response>'
|
||||
)
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to forward call: {str(e)}")
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Pipecat Simple ChatBot")
|
||||
parser.add_argument("-u", type=str, help="Room URL")
|
||||
parser.add_argument("-t", type=str, help="Token")
|
||||
parser.add_argument("-i", type=str, help="Call ID")
|
||||
parser.add_argument("-s", type=str, help="SIP URI")
|
||||
config = parser.parse_args()
|
||||
|
||||
asyncio.run(main(config.u, config.t, config.i, config.s))
|
||||
8
examples/dialin-chatbot/env.example
Normal file
8
examples/dialin-chatbot/env.example
Normal file
@@ -0,0 +1,8 @@
|
||||
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (optional: for joining the bot to the same room repeatedly for local dev)
|
||||
DAILY_API_KEY=.
|
||||
DAILY_API_URL=api.daily.co/v1
|
||||
OPENAI_API_KEY=
|
||||
ELEVENLABS_API_KEY=
|
||||
ELEVENLABS_VOICE_ID=
|
||||
TWILIO_ACCOUNT_SID=
|
||||
TWILIO_AUTH_TOKEN=
|
||||
19
examples/dialin-chatbot/fly.example.toml
Normal file
19
examples/dialin-chatbot/fly.example.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
# fly.toml app configuration file generated for pipecat-dialin-demo on 2024-06-03T15:57:57+02:00
|
||||
#
|
||||
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
|
||||
#
|
||||
|
||||
app = 'pipecat-dialin-demo'
|
||||
primary_region = 'sjc'
|
||||
|
||||
[build]
|
||||
|
||||
[http_service]
|
||||
internal_port = 7860
|
||||
force_https = true
|
||||
auto_stop_machines = true
|
||||
auto_start_machines = true
|
||||
min_machines_running = 1
|
||||
|
||||
[[vm]]
|
||||
size = 'performance-1x'
|
||||
BIN
examples/dialin-chatbot/image.png
Normal file
BIN
examples/dialin-chatbot/image.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 19 KiB |
5
examples/dialin-chatbot/requirements.txt
Normal file
5
examples/dialin-chatbot/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
pipecat-ai[daily,openai,silero]
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
twilio
|
||||
@@ -9,11 +9,11 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
@@ -27,15 +27,16 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
@@ -47,10 +48,9 @@ async def main(room_url):
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_new_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
|
||||
await task.queue_frame(TextFrame(f"Hello there, {participant_name}!"))
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -9,11 +9,11 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
|
||||
@@ -30,10 +30,9 @@ async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = LocalAudioTransport(TransportParams(audio_out_enabled=True))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
pipeline = Pipeline([tts, transport.output()])
|
||||
@@ -42,7 +41,7 @@ async def main():
|
||||
|
||||
async def say_something():
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frames([TextFrame("Hello there!"), EndFrame()])
|
||||
await task.queue_frame(TextFrame("Hello there!"))
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -9,11 +9,11 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -28,18 +28,19 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Say One Thing From an LLM",
|
||||
DailyParams(audio_out_enabled=True))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
@@ -58,11 +59,10 @@ async def main(room_url):
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await task.queue_frames([LLMMessagesFrame(messages), EndFrame()])
|
||||
await task.queue_frame(LLMMessagesFrame(messages))
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -27,8 +27,10 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
@@ -64,5 +66,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -56,7 +56,7 @@ async def main():
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def run_tk():
|
||||
while runner.is_active():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
@@ -30,8 +30,10 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(room_url, None, "Static And Dynamic Speech")
|
||||
|
||||
meeting = TransportServiceOutput(transport, mic_enabled=True)
|
||||
@@ -82,5 +84,4 @@ async def main(room_url: str):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -13,7 +13,6 @@ from dataclasses import dataclass
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
AppFrame,
|
||||
EndFrame,
|
||||
Frame,
|
||||
ImageRawFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
@@ -73,8 +72,10 @@ class MonthPrepender(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
@@ -150,8 +151,6 @@ async def main(room_url):
|
||||
frames.append(MonthFrame(month=month))
|
||||
frames.append(LLMMessagesFrame(messages))
|
||||
|
||||
frames.append(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
@@ -162,5 +161,4 @@ async def main(room_url):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -137,14 +137,11 @@ async def main():
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
# We only specify 5 months as we create tasks all at once and we might
|
||||
# get rate limited otherwise.
|
||||
# We only specify a few months as we create tasks all at once and we
|
||||
# might get rate limited otherwise.
|
||||
months: list[str] = [
|
||||
"January",
|
||||
"February",
|
||||
# "March",
|
||||
# "April",
|
||||
# "May",
|
||||
]
|
||||
|
||||
# We create one task per month. This will be executed concurrently.
|
||||
|
||||
@@ -9,16 +9,16 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.frames.frames import Frame, LLMMessagesFrame, MetricsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -34,8 +34,18 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
class MetricsLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
if isinstance(frame, MetricsFrame):
|
||||
print(
|
||||
f"!!! MetricsFrame: {frame}, ttfb: {frame.ttfb}, processing: {frame.processing}, tokens: {frame.tokens}, characters: {frame.characters}")
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -48,19 +58,17 @@ async def main(room_url: str, token):
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
model="gpt-4o"
|
||||
)
|
||||
|
||||
fl = FrameLogger("!!! after LLM", "red")
|
||||
fltts = FrameLogger("@@@ out of tts", "green")
|
||||
flend = FrameLogger("### out of the end", "magenta")
|
||||
ml = MetricsLogger()
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -75,15 +83,18 @@ async def main(room_url: str, token):
|
||||
transport.input(),
|
||||
tma_in,
|
||||
llm,
|
||||
fl,
|
||||
tts,
|
||||
fltts,
|
||||
ml,
|
||||
transport.output(),
|
||||
tma_out,
|
||||
flend
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(pipeline, PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=False,
|
||||
))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
@@ -99,5 +110,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -51,7 +51,7 @@ class ImageSyncAggregator(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if not isinstance(frame, SystemFrame):
|
||||
if not isinstance(frame, SystemFrame) and direction == FrameDirection.DOWNSTREAM:
|
||||
await self.push_frame(ImageRawFrame(image=self._speaking_image_bytes, size=(1024, 1024), format=self._speaking_image_format))
|
||||
await self.push_frame(frame)
|
||||
await self.push_frame(ImageRawFrame(image=self._waiting_image_bytes, size=(1024, 1024), format=self._waiting_image_format))
|
||||
@@ -59,19 +59,22 @@ class ImageSyncAggregator(FrameProcessor):
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -116,7 +119,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([TextFrame(f"Hi, this is {participant_name}.")])
|
||||
await task.queue_frames([TextFrame(f"Hi there {participant_name}!")])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
@@ -124,5 +127,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -15,7 +15,7 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -31,8 +31,10 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -45,10 +47,9 @@ async def main(room_url: str, token):
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
@@ -74,7 +75,12 @@ async def main(room_url: str, token):
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
@@ -90,5 +96,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -15,7 +15,7 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -31,8 +31,10 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -45,10 +47,9 @@ async def main(room_url: str, token):
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
@@ -91,5 +92,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -17,6 +17,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -47,8 +48,10 @@ def get_session_history(session_id: str) -> BaseChatMessageHistory:
|
||||
return message_store[session_id]
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -61,10 +64,9 @@ async def main(room_url: str, token):
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
@@ -121,5 +123,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -15,7 +15,7 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.deepgram import DeepgramTTSService
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -31,20 +31,24 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True
|
||||
)
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
@@ -67,6 +71,7 @@ async def main(room_url: str, token):
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
@@ -90,5 +95,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -32,15 +32,17 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=44100,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
@@ -49,8 +51,8 @@ async def main(room_url: str, token):
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_name="British Lady",
|
||||
output_format="pcm_44100"
|
||||
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
||||
sample_rate=44100,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
@@ -72,11 +74,11 @@ async def main(room_url: str, token):
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
tma_out, # Goes before the transport because cartesia has word-level timestamps!
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
@@ -92,5 +94,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -19,7 +19,6 @@ from pipecat.services.playht import PlayHTTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
|
||||
from runner import configure
|
||||
|
||||
@@ -32,8 +31,10 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -92,5 +93,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
103
examples/foundational/07f-interruptible-azure.py
Normal file
103
examples/foundational/07f-interruptible-azure.py
Normal file
@@ -0,0 +1,103 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.azure import AzureLLMService, AzureSTTService, AzureTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
)
|
||||
)
|
||||
|
||||
stt = AzureSTTService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
llm = AzureLLMService(
|
||||
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
|
||||
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
|
||||
model=os.getenv("AZURE_CHATGPT_MODEL"),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -4,8 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -31,8 +31,10 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -90,5 +92,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
102
examples/foundational/07h-interruptible-openpipe.py
Normal file
102
examples/foundational/07h-interruptible-openpipe.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openpipe import OpenPipeLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
import time
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
timestamp = int(time.time())
|
||||
llm = OpenPipeLLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
openpipe_api_key=os.getenv("OPENPIPE_API_KEY"),
|
||||
model="gpt-4o",
|
||||
tags={
|
||||
"conversation_id": f"pipecat-{timestamp}"
|
||||
}
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
97
examples/foundational/07i-interruptible-xtts.py
Normal file
97
examples/foundational/07i-interruptible-xtts.py
Normal file
@@ -0,0 +1,97 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.xtts import XTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
)
|
||||
)
|
||||
|
||||
tts = XTTSService(
|
||||
aiohttp_session=session,
|
||||
voice_id="Claribel Dervla",
|
||||
language="en",
|
||||
base_url="http://localhost:8000"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
100
examples/foundational/07j-interruptible-gladia.py
Normal file
100
examples/foundational/07j-interruptible-gladia.py
Normal file
@@ -0,0 +1,100 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.gladia import GladiaSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
)
|
||||
)
|
||||
|
||||
stt = GladiaSTTService(
|
||||
api_key=os.getenv("GLADIA_API_KEY"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -4,8 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -15,12 +15,11 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.azure import AzureTTSService
|
||||
from pipecat.services.lmnt import LmntTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
@@ -32,24 +31,26 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
)
|
||||
|
||||
tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
tts = LmntTTSService(
|
||||
api_key=os.getenv("LMNT_API_KEY"),
|
||||
voice_id="morgan"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
@@ -91,5 +92,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
@@ -22,8 +22,10 @@ logger = logging.getLogger("pipecat")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
@@ -144,5 +146,4 @@ async def main(room_url: str):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
@@ -23,32 +24,34 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url, token):
|
||||
transport = DailyTransport(
|
||||
room_url, token, "Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, token, "Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
|
||||
pipeline = Pipeline([transport.input(), transport.output()])
|
||||
pipeline = Pipeline([transport.input(), transport.output()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
@@ -27,40 +28,44 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url, token):
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Local Mirror")
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
daily_transport = DailyTransport(room_url, token, "Test", DailyParams(audio_in_enabled=True))
|
||||
tk_root = tk.Tk()
|
||||
tk_root.title("Local Mirror")
|
||||
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720))
|
||||
daily_transport = DailyTransport(
|
||||
room_url, token, "Test", DailyParams(
|
||||
audio_in_enabled=True))
|
||||
|
||||
@daily_transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720))
|
||||
|
||||
pipeline = Pipeline([daily_transport.input(), tk_transport.output()])
|
||||
@daily_transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_video(participant["id"])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
pipeline = Pipeline([daily_transport.input(), tk_transport.output()])
|
||||
|
||||
async def run_tk():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
async def run_tk():
|
||||
while not task.has_finished():
|
||||
tk_root.update()
|
||||
tk_root.update_idletasks()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
await asyncio.gather(runner.run(task), run_tk())
|
||||
runner = PipelineRunner()
|
||||
|
||||
await asyncio.gather(runner.run(task), run_tk())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -15,7 +15,7 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -31,9 +31,10 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -46,10 +47,9 @@ async def main(room_url: str, token):
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
@@ -90,5 +90,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -83,8 +83,10 @@ class InboundSoundEffectWrapper(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -148,5 +150,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -49,8 +49,10 @@ class UserImageRequester(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -63,12 +65,6 @@ async def main(room_url: str, token):
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
user_response = UserResponseAggregator()
|
||||
|
||||
image_requester = UserImageRequester()
|
||||
@@ -78,10 +74,9 @@ async def main(room_url: str, token):
|
||||
# If you run into weird description, try with use_cpu=True
|
||||
moondream = MoondreamService()
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -108,5 +103,4 @@ async def main(room_url: str, token):
|
||||
await runner.run(task)
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -49,8 +49,10 @@ class UserImageRequester(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -74,10 +76,9 @@ async def main(room_url: str, token):
|
||||
model="gemini-1.5-flash-latest",
|
||||
api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -104,5 +105,4 @@ async def main(room_url: str, token):
|
||||
await runner.run(task)
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -49,8 +49,10 @@ class UserImageRequester(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -74,10 +76,9 @@ async def main(room_url: str, token):
|
||||
model="gpt-4o"
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -104,5 +105,4 @@ async def main(room_url: str, token):
|
||||
await runner.run(task)
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -49,8 +49,10 @@ class UserImageRequester(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -70,14 +72,13 @@ async def main(room_url: str, token):
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
anthropic = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-sonnet-20240229"
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY")
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
sample_rate=16000,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
@@ -104,5 +105,4 @@ async def main(room_url: str, token):
|
||||
await runner.run(task)
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
@@ -35,23 +36,25 @@ class TranscriptionLogger(FrameProcessor):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main(room_url: str):
|
||||
transport = DailyTransport(room_url, None, "Transcription bot",
|
||||
DailyParams(audio_in_enabled=True))
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
stt = WhisperSTTService()
|
||||
transport = DailyTransport(room_url, None, "Transcription bot",
|
||||
DailyParams(audio_in_enabled=True))
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
stt = WhisperSTTService()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
runner = PipelineRunner()
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
await runner.run(task)
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url))
|
||||
asyncio.run(main())
|
||||
|
||||
61
examples/foundational/13b-deepgram-transcription.py
Normal file
61
examples/foundational/13b-deepgram-transcription.py
Normal file
@@ -0,0 +1,61 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(room_url, None, "Transcription bot",
|
||||
DailyParams(audio_in_enabled=True))
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -13,12 +13,8 @@ from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator,
|
||||
)
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
@@ -36,16 +32,18 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def start_fetch_weather(llm):
|
||||
await llm.push_frame(TextFrame("Let me think."))
|
||||
async def start_fetch_weather(function_name, llm, context):
|
||||
await llm.push_frame(TextFrame("Let me check on that."))
|
||||
|
||||
|
||||
async def fetch_weather_from_api(llm, args):
|
||||
return {"conditions": "nice", "temperature": "75"}
|
||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -58,17 +56,18 @@ async def main(room_url: str, token):
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
# Register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(
|
||||
"get_current_weather",
|
||||
None,
|
||||
fetch_weather_from_api,
|
||||
start_callback=start_fetch_weather)
|
||||
|
||||
@@ -109,22 +108,22 @@ async def main(room_url: str, token):
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
tma_in = LLMUserContextAggregator(context)
|
||||
tma_out = LLMAssistantContextAggregator(context)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline([
|
||||
fl_in,
|
||||
transport.input(),
|
||||
tma_in,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
fl_out,
|
||||
tts,
|
||||
transport.output(),
|
||||
tma_out
|
||||
context_aggregator.assistant(),
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@ transport.event_handler("on_first_participant_joined")
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
@@ -134,7 +133,5 @@ async def main(room_url: str, token):
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -14,10 +14,6 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.filters.function_filter import FunctionFilter
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
@@ -40,10 +36,10 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
current_voice = "News Lady"
|
||||
|
||||
|
||||
async def switch_voice(llm, args):
|
||||
async def switch_voice(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
global current_voice
|
||||
current_voice = args["voice"]
|
||||
return {"voice": f"You are now using your {current_voice} voice. Your responses should now be as if you were a {current_voice}."}
|
||||
await result_callback({"voice": f"You are now using your {current_voice} voice. Your responses should now be as if you were a {current_voice}."})
|
||||
|
||||
|
||||
async def news_lady_filter(frame) -> bool:
|
||||
@@ -58,15 +54,16 @@ async def barbershop_man_filter(frame) -> bool:
|
||||
return current_voice == "Barbershop Man"
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Pipecat",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=44100,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
@@ -75,20 +72,17 @@ async def main(room_url: str, token):
|
||||
|
||||
news_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_name="Newslady",
|
||||
output_format="pcm_44100"
|
||||
voice_id="bf991597-6c13-47e4-8411-91ec2de5c466", # Newslady
|
||||
)
|
||||
|
||||
british_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_name="British Lady",
|
||||
output_format="pcm_44100"
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
barbershop_man = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_name="Barbershop Man",
|
||||
output_format="pcm_44100"
|
||||
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
@@ -121,12 +115,11 @@ async def main(room_url: str, token):
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
tma_in = LLMUserContextAggregator(context)
|
||||
tma_out = LLMAssistantContextAggregator(context)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (one of the following vocies)
|
||||
[FunctionFilter(news_lady_filter), news_lady], # News Lady voice
|
||||
@@ -134,7 +127,7 @@ async def main(room_url: str, token):
|
||||
[FunctionFilter(barbershop_man_filter), barbershop_man], # Barbershop Man voice
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
@@ -155,5 +148,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -14,13 +14,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.filters.function_filter import FunctionFilter
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.whisper import Model, WhisperSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -41,10 +37,10 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
current_language = "English"
|
||||
|
||||
|
||||
async def switch_language(llm, args):
|
||||
async def switch_language(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
global current_language
|
||||
current_language = args["language"]
|
||||
return {"voice": f"Your answers from now on should be in {current_language}."}
|
||||
await result_callback({"voice": f"Your answers from now on should be in {current_language}."})
|
||||
|
||||
|
||||
async def english_filter(frame) -> bool:
|
||||
@@ -55,8 +51,10 @@ async def spanish_filter(frame) -> bool:
|
||||
return current_language == "Spanish"
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -72,17 +70,14 @@ async def main(room_url: str, token):
|
||||
|
||||
stt = WhisperSTTService(model=Model.LARGE)
|
||||
|
||||
english_tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
english_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
spanish_tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
model="eleven_multilingual_v2",
|
||||
voice_id="9F4C8ztpNUmXkdDDbz3J",
|
||||
spanish_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="846d6cb0-2301-48b6-9683-48f5618ea2f6", # Spanish-speaking Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
@@ -115,20 +110,19 @@ async def main(room_url: str, token):
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
tma_in = LLMUserContextAggregator(context)
|
||||
tma_out = LLMAssistantContextAggregator(context)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
tma_in, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (bot will speak the chosen language)
|
||||
[FunctionFilter(english_filter), english_tts], # English
|
||||
[FunctionFilter(spanish_filter), spanish_tts], # Spanish
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
context_aggregator.assistant() # Assistant spoken responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
@@ -149,5 +143,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
130
examples/foundational/16-gpu-container-local-bot.py
Normal file
130
examples/foundational/16-gpu-container-local-bot.py
Normal file
@@ -0,0 +1,130 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.services.deepgram import DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyTransportMessageFrame
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
)
|
||||
|
||||
tts = DeepgramTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
voice="aura-asteria-en",
|
||||
base_url="http://0.0.0.0:8080/v1/speak"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
# To use OpenAI
|
||||
# api_key=os.getenv("OPENAI_API_KEY"),
|
||||
# model="gpt-4o"
|
||||
# Or, to use a local vLLM (or similar) api server
|
||||
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
||||
base_url="http://0.0.0.0:8000/v1"
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
# When a participant joins, start transcription for that participant so the
|
||||
# bot can "hear" and respond to them.
|
||||
@transport.event_handler("on_participant_joined")
|
||||
async def on_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
# When the first participant joins, the bot should introduce itself.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
# Handle "latency-ping" messages. The client will send app messages that look like
|
||||
# this:
|
||||
# { "latency-ping": { ts: <client-side timestamp> }}
|
||||
#
|
||||
# We want to send an immediate pong back to the client from this handler function.
|
||||
# Also, we will push a frame into the top of the pipeline and send it after the
|
||||
#
|
||||
@transport.event_handler("on_app_message")
|
||||
async def on_app_message(transport, message, sender):
|
||||
try:
|
||||
if "latency-ping" in message:
|
||||
logger.debug(f"Received latency ping app message: {message}")
|
||||
ts = message["latency-ping"]["ts"]
|
||||
# Send immediately
|
||||
transport.output().send_message(DailyTransportMessageFrame(
|
||||
message={"latency-pong-msg-handler": {"ts": ts}},
|
||||
participant_id=sender))
|
||||
# And push to the pipeline for the Daily transport.output to send
|
||||
await tma_in.push_frame(
|
||||
DailyTransportMessageFrame(
|
||||
message={"latency-pong-pipeline-delivery": {"ts": ts}},
|
||||
participant_id=sender))
|
||||
except Exception as e:
|
||||
logger.debug(f"message handling error: {e} - {message}")
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
107
examples/foundational/17-detect-user-idle.py
Normal file
107
examples/foundational/17-detect-user-idle.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(messages)
|
||||
|
||||
async def user_idle_callback(user_idle: UserIdleProcessor):
|
||||
messages.append(
|
||||
{"role": "system", "content": "Ask the user if they are still there and try to prompt for some input, but be short."})
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
|
||||
user_idle = UserIdleProcessor(callback=user_idle_callback, timeout=5.0)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
user_idle, # Idle user check-in
|
||||
tma_in, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
tma_out # Assistant spoken responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
78
examples/foundational/18-gstreamer-filesrc.py
Normal file
78
examples/foundational/18-gstreamer-filesrc.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure_with_args
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--input",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Input video file")
|
||||
|
||||
(room_url, _, args) = await configure_with_args(session, parser)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"GStreamer",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_is_live=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
camera_out_is_live=True,
|
||||
)
|
||||
)
|
||||
|
||||
gst = GStreamerPipelineSource(
|
||||
pipeline=f"filesrc location={args.input}",
|
||||
out_params=GStreamerPipelineSource.OutputParams(
|
||||
video_width=1280,
|
||||
video_height=720,
|
||||
audio_sample_rate=16000,
|
||||
audio_channels=1,
|
||||
)
|
||||
)
|
||||
|
||||
pipeline = Pipeline([
|
||||
gst, # GStreamer file source
|
||||
transport.output(), # Transport bot output
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
64
examples/foundational/18a-gstreamer-videotestsrc.py
Normal file
64
examples/foundational/18a-gstreamer-videotestsrc.py
Normal file
@@ -0,0 +1,64 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"GStreamer",
|
||||
DailyParams(
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
camera_out_is_live=True,
|
||||
)
|
||||
)
|
||||
|
||||
gst = GStreamerPipelineSource(
|
||||
pipeline="videotestsrc ! capsfilter caps=\"video/x-raw,width=1280,height=720,framerate=30/1\"",
|
||||
out_params=GStreamerPipelineSource.OutputParams(
|
||||
video_width=1280,
|
||||
video_height=720,
|
||||
clock_sync=False))
|
||||
|
||||
pipeline = Pipeline([
|
||||
gst, # GStreamer file source
|
||||
transport.output(), # Transport bot output
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
116
examples/foundational/19a-tools-anthropic.py
Normal file
116
examples/foundational/19a-tools-anthropic.py
Normal file
@@ -0,0 +1,116 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-5-sonnet-20240620"
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
# messages = [{"role": "system",
|
||||
# "content": "You are a helpful assistant who can report the weather in any location in the universe. Respond concisely. Your response will be turned into speech so use only simple words and punctuation."},
|
||||
# {"role": "user",
|
||||
# "content": " Start the conversation by introducing yourself."}]
|
||||
|
||||
messages = [{"role": "user", "content": "Say 'hello' to start the conversation."}]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User spoken responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@ transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
172
examples/foundational/19b-tools-video-anthropic.py
Normal file
172
examples/foundational/19b-tools-video-anthropic.py
Normal file
@@ -0,0 +1,172 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
video_participant_id = None
|
||||
|
||||
|
||||
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def get_image(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
question = arguments["question"]
|
||||
await llm.request_image_frame(user_id=video_participant_id, text_content=question)
|
||||
|
||||
|
||||
async def main():
|
||||
global llm
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-5-sonnet-20240620",
|
||||
enable_prompt_caching_beta=True
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
llm.register_function("get_image", get_image)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "get_image",
|
||||
"description": "Get an image from the video stream.",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
"required": ["question"],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
system_prompt = """\
|
||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||
|
||||
Your response will be turned into speech so use only simple words and punctuation.
|
||||
|
||||
You have access to two tools: get_weather and get_image.
|
||||
|
||||
You can respond to questions about the weather using the get_weather tool.
|
||||
|
||||
You can answer questions about the user's video stream using the get_image tool. Some examples of phrases that \
|
||||
indicate you should use the get_image tool are:
|
||||
- What do you see?
|
||||
- What's in the video?
|
||||
- Can you describe the video?
|
||||
- Tell me about what you see.
|
||||
- Tell me something interesting about what you see.
|
||||
- What's happening in the video?
|
||||
|
||||
If you need to use a tool, simply use the tool. Do not tell the user the tool you are using. Be brief and concise.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": system_prompt,
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Start the conversation by introducing yourself."
|
||||
}]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User speech to text
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@ transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
global video_participant_id
|
||||
video_participant_id = participant["id"]
|
||||
transport.capture_participant_transcription(video_participant_id)
|
||||
transport.capture_participant_video(video_participant_id, framerate=0)
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
138
examples/foundational/19c-tools-togetherai.py
Normal file
138
examples/foundational/19c-tools-togetherai.py
Normal file
@@ -0,0 +1,138 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.together import TogetherLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def get_current_weather(
|
||||
function_name,
|
||||
tool_call_id,
|
||||
arguments,
|
||||
llm,
|
||||
context,
|
||||
result_callback):
|
||||
logger.debug("IN get_current_weather")
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
)
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = TogetherLLMService(
|
||||
api_key=os.getenv("TOGETHER_API_KEY"),
|
||||
model=os.getenv("TOGETHER_MODEL"),
|
||||
)
|
||||
llm.register_function("get_current_weather", get_current_weather)
|
||||
|
||||
weatherTool = {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
|
||||
system_prompt = f"""\
|
||||
You have access to the following functions:
|
||||
|
||||
Use the function '{weatherTool["name"]}' to '{weatherTool["description"]}':
|
||||
{json.dumps(weatherTool)}
|
||||
|
||||
If you choose to call a function ONLY reply in the following format with no prefix or suffix:
|
||||
|
||||
<function=example_function_name>{{\"example_name\": \"example_value\"}}</function>
|
||||
|
||||
Reminder:
|
||||
- Function calls MUST follow the specified format, start with <function= and end with </function>
|
||||
- Required parameters MUST be specified
|
||||
- Only call one function at a time
|
||||
- Put the entire function call reply on one line
|
||||
- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls
|
||||
|
||||
"""
|
||||
|
||||
messages = [{"role": "system",
|
||||
"content": system_prompt},
|
||||
{"role": "user",
|
||||
"content": "Wait for the user to say something."}]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User speech to text
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
|
||||
@ transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,12 +1,26 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
(url, token, _) = await configure_with_args(aiohttp_session)
|
||||
return (url, token)
|
||||
|
||||
|
||||
async def configure_with_args(
|
||||
aiohttp_session: aiohttp.ClientSession,
|
||||
parser: argparse.ArgumentParser | None = None):
|
||||
if not parser:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--url",
|
||||
@@ -33,26 +47,15 @@ def configure():
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return (url, token)
|
||||
return (url, token, args)
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
@@ -25,7 +31,7 @@ from pipecat.processors.aggregators.llm_response import LLMUserResponseAggregato
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
@@ -128,8 +134,10 @@ class ImageFilterProcessor(FrameProcessor):
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -145,10 +153,9 @@ async def main(room_url: str, token):
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
@@ -204,5 +211,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,moondream,openai,silero]
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
@@ -33,26 +39,16 @@ def configure():
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
@@ -1,31 +1,51 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
|
||||
aiohttp_session=aiohttp_session
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -39,45 +59,45 @@ app.add_middleware(
|
||||
@app.get("/start")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room_url, room_name = _create_room()
|
||||
print(f"!!! Room URL: {room_url}")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
print(f"!!! Room URL: {room.url}")
|
||||
# Ensure the room property is present
|
||||
if not room_url:
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room.url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
f"python3 -m bot -u {room.url} -t {token}"
|
||||
],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
bot_procs[proc.pid] = (proc, room.url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
return RedirectResponse(room.url)
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
@@ -10,24 +10,14 @@ import os
|
||||
import sys
|
||||
import wave
|
||||
|
||||
from typing import List
|
||||
|
||||
from openai._types import NotGiven, NOT_GIVEN
|
||||
|
||||
from openai.types.chat import (
|
||||
ChatCompletionToolParam,
|
||||
)
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMUserContextAggregator, LLMAssistantContextAggregator
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMContextFrame, OpenAILLMService
|
||||
from pipecat.services.ai_services import AIService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
@@ -64,20 +54,11 @@ for file in sound_files:
|
||||
|
||||
|
||||
class IntakeProcessor:
|
||||
def __init__(
|
||||
self,
|
||||
context: OpenAILLMContext,
|
||||
llm: AIService,
|
||||
tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._context: OpenAILLMContext = context
|
||||
self._llm = llm
|
||||
|
||||
def __init__(self, context: OpenAILLMContext):
|
||||
print(f"Initializing context from IntakeProcessor")
|
||||
self._context.add_message({"role": "system", "content": "You are Jessica, an agent for a company called Tri-County Health Services. Your job is to collect important information from the user before their doctor visit. You're talking to Chad Bailey. You should address the user by their first name and be polite and professional. You're not a medical professional, so you shouldn't provide any advice. Keep your responses short. Your job is to collect information to give to a doctor. Don't make assumptions about what values to plug into functions. Ask for clarification if a user response is ambiguous. Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function."})
|
||||
self._context.set_tools([
|
||||
context.add_message({"role": "system", "content": "You are Jessica, an agent for a company called Tri-County Health Services. Your job is to collect important information from the user before their doctor visit. You're talking to Chad Bailey. You should address the user by their first name and be polite and professional. You're not a medical professional, so you shouldn't provide any advice. Keep your responses short. Your job is to collect information to give to a doctor. Don't make assumptions about what values to plug into functions. Ask for clarification if a user response is ambiguous. Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function."})
|
||||
context.set_tools([
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
@@ -93,18 +74,17 @@ class IntakeProcessor:
|
||||
},
|
||||
},
|
||||
}])
|
||||
# Create an allowlist of functions that the LLM can call
|
||||
self._functions = [
|
||||
"verify_birthday",
|
||||
"list_prescriptions",
|
||||
"list_allergies",
|
||||
"list_conditions",
|
||||
"list_visit_reasons",
|
||||
]
|
||||
|
||||
async def verify_birthday(self, llm, args):
|
||||
async def verify_birthday(
|
||||
self,
|
||||
function_name,
|
||||
tool_call_id,
|
||||
args,
|
||||
llm,
|
||||
context,
|
||||
result_callback):
|
||||
if args["birthday"] == "1983-01-01":
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -134,18 +114,18 @@ class IntakeProcessor:
|
||||
},
|
||||
}])
|
||||
# It's a bit weird to push this to the LLM, but it gets it into the pipeline
|
||||
await llm.push_frame(sounds["ding2.wav"], FrameDirection.DOWNSTREAM)
|
||||
# await llm.push_frame(sounds["ding2.wav"], FrameDirection.DOWNSTREAM)
|
||||
# We don't need the function call in the context, so just return a new
|
||||
# system message and let the framework re-prompt
|
||||
return [{"role": "system", "content": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages."}]
|
||||
await result_callback([{"role": "system", "content": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages."}])
|
||||
else:
|
||||
# The user provided an incorrect birthday; ask them to try again
|
||||
return [{"role": "system", "content": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function."}]
|
||||
await result_callback([{"role": "system", "content": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function."}])
|
||||
|
||||
async def start_prescriptions(self, llm):
|
||||
async def start_prescriptions(self, function_name, llm, context):
|
||||
print(f"!!! doing start prescriptions")
|
||||
# Move on to allergies
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -169,18 +149,18 @@ class IntakeProcessor:
|
||||
},
|
||||
},
|
||||
}])
|
||||
self._context.add_message(
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Next, ask the user if they have any allergies. Once they have listed their allergies or confirmed they don't have any, call the list_allergies function."})
|
||||
print(f"!!! about to await llm process frame in start prescrpitions")
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
print(f"!!! past await process frame in start prescriptions")
|
||||
|
||||
async def start_allergies(self, llm):
|
||||
async def start_allergies(self, function_name, llm, context):
|
||||
print("!!! doing start allergies")
|
||||
# Move on to conditions
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -205,16 +185,16 @@ class IntakeProcessor:
|
||||
},
|
||||
},
|
||||
])
|
||||
self._context.add_message(
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Now ask the user if they have any medical conditions the doctor should know about. Once they've answered the question, call the list_conditions function."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def start_conditions(self, llm):
|
||||
async def start_conditions(self, function_name, llm, context):
|
||||
print("!!! doing start conditions")
|
||||
# Move on to visit reasons
|
||||
self._context.set_tools(
|
||||
context.set_tools(
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
@@ -238,36 +218,37 @@ class IntakeProcessor:
|
||||
},
|
||||
},
|
||||
}])
|
||||
self._context.add_message(
|
||||
{"role": "system", "content": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
context.add_message(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def start_visit_reasons(self, llm):
|
||||
async def start_visit_reasons(self, function_name, llm, context):
|
||||
print("!!! doing start visit reasons")
|
||||
# move to finish call
|
||||
self._context.set_tools([])
|
||||
self._context.add_message({"role": "system",
|
||||
"content": "Now, thank the user and end the conversation."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(self._context), FrameDirection.DOWNSTREAM)
|
||||
context.set_tools([])
|
||||
context.add_message({"role": "system",
|
||||
"content": "Now, thank the user and end the conversation."})
|
||||
await llm.process_frame(OpenAILLMContextFrame(context), FrameDirection.DOWNSTREAM)
|
||||
|
||||
async def save_data(self, llm, args):
|
||||
async def save_data(self, function_name, tool_call_id, args, llm, context, result_callback):
|
||||
logger.info(f"!!! Saving data: {args}")
|
||||
# Since this is supposed to be "async", returning None from the callback
|
||||
# will prevent adding anything to context or re-prompting
|
||||
return None
|
||||
await result_callback(None)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
@@ -282,31 +263,25 @@ async def main(room_url: str, token):
|
||||
)
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="pNInz6obpgDQGcFmaJgB",
|
||||
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
# tts = CartesiaTTSService(
|
||||
# api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
# voice_id="846d6cb0-2301-48b6-9683-48f5618ea2f6", # Spanish-speaking Lady
|
||||
# )
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o")
|
||||
|
||||
messages = []
|
||||
context = OpenAILLMContext(messages=messages)
|
||||
user_context = LLMUserContextAggregator(context)
|
||||
assistant_context = LLMAssistantContextAggregator(context)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
intake = IntakeProcessor(context, llm)
|
||||
intake = IntakeProcessor(context)
|
||||
llm.register_function("verify_birthday", intake.verify_birthday)
|
||||
llm.register_function(
|
||||
"list_prescriptions",
|
||||
@@ -329,12 +304,12 @@ async def main(room_url: str, token):
|
||||
|
||||
pipeline = Pipeline([
|
||||
transport.input(), # Transport input
|
||||
user_context, # User responses
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
fl, # Frame logger
|
||||
tts, # TTS
|
||||
transport.output(), # Transport output
|
||||
assistant_context, # Assistant responses
|
||||
context_aggregator.assistant(), # Assistant responses
|
||||
])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=False))
|
||||
@@ -351,5 +326,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,openai,silero]
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
@@ -33,26 +39,15 @@ def configure():
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session)
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
@@ -1,31 +1,51 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
|
||||
aiohttp_session=aiohttp_session
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -39,45 +59,45 @@ app.add_middleware(
|
||||
@app.get("/start")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room_url, room_name = _create_room()
|
||||
print(f"!!! Room URL: {room_url}")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
print(f"!!! Room URL: {room.url}")
|
||||
# Ensure the room property is present
|
||||
if not room_url:
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room.url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
f"python3 -m bot -u {room.url} -t {token}"
|
||||
],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
bot_procs[proc.pid] = (proc, room.url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
return RedirectResponse(room.url)
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import os
|
||||
@@ -20,7 +26,7 @@ from pipecat.frames.frames import (
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTranscriptionSettings, DailyTransport
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
from runner import configure
|
||||
@@ -77,8 +83,10 @@ class TalkingAnimation(FrameProcessor):
|
||||
await self.push_frame(frame)
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -165,5 +173,4 @@ async def main(room_url: str, token):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
python-dotenv
|
||||
requests
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,openai,silero]
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
def configure():
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
@@ -33,26 +39,16 @@ def configure():
|
||||
if not key:
|
||||
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
room_name: str = urllib.parse.urlparse(url).path[1:]
|
||||
expiration: float = time.time() + 60 * 60
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://api.daily.co/v1/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True,
|
||||
"exp": expiration}},
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=key,
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
# Create a meeting token for the given room with an expiration 1 hour in
|
||||
# the future.
|
||||
expiry_time: float = 60 * 60
|
||||
|
||||
token: str = res.json()["token"]
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
|
||||
@@ -1,31 +1,51 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
for entry in bot_procs.values():
|
||||
proc = entry[0]
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
|
||||
aiohttp_session=aiohttp_session
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -39,45 +59,45 @@ app.add_middleware(
|
||||
@app.get("/start")
|
||||
async def start_agent(request: Request):
|
||||
print(f"!!! Creating room")
|
||||
room_url, room_name = _create_room()
|
||||
print(f"!!! Room URL: {room_url}")
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
print(f"!!! Room URL: {room.url}")
|
||||
# Ensure the room property is present
|
||||
if not room_url:
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room.url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
token = await daily_helpers["rest"].get_token(room.url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
f"python3 -m bot -u {room.url} -t {token}"
|
||||
],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
bot_procs[proc.pid] = (proc, room.url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
return RedirectResponse(room.url)
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM python:3.11-bullseye
|
||||
FROM python:3.11-slim-bookworm
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
ARG USE_PERSISTENT_DATA
|
||||
@@ -51,4 +51,4 @@ COPY --chown=user ./frontend/ frontend/
|
||||
RUN cd frontend && npm install && npm run build
|
||||
|
||||
# Start the FastAPI server
|
||||
CMD python3 src/server.py --port ${FAST_API_PORT}
|
||||
CMD python3 src/bot_runner.py --port ${FAST_API_PORT}
|
||||
@@ -48,6 +48,8 @@ pip install -r requirements.txt
|
||||
mv env.example .env
|
||||
```
|
||||
|
||||
When deploying to production, to ensure only this app can spawn a new bot, set your `ENV` to `production`
|
||||
|
||||
**Build the frontend:**
|
||||
|
||||
This project uses a custom frontend, which needs to built. Note: this is done automatically as part of the Docker deployment.
|
||||
@@ -64,11 +66,11 @@ The build UI files can be found in `frontend/out`
|
||||
|
||||
Start the API / bot manager:
|
||||
|
||||
`python src/server.py`
|
||||
`python src/bot_runner.py`
|
||||
|
||||
If you'd like to run a custom domain or port:
|
||||
|
||||
`python src/server.py --host somehost --p 7777`
|
||||
`python src/bot_runner.py --host somehost --p someport`
|
||||
|
||||
➡️ Open the host URL in your browser `http://localhost:7860`
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
DAILY_API_KEY=7df...
|
||||
ELEVENLABS_API_KEY=aeb...
|
||||
ELEVENLABS_VOICE_ID=7S...
|
||||
FAL_KEY=8c...
|
||||
OPENAI_API_KEY=sk-PL...
|
||||
DAILY_API_KEY=
|
||||
DAILY_SAMPLE_ROOM_URL=
|
||||
ELEVENLABS_API_KEY=
|
||||
ELEVENLABS_VOICE_ID=
|
||||
FAL_KEY=
|
||||
OPENAI_API_KEY=
|
||||
|
||||
ENV= # dev | production
|
||||
RUN_AS_VM= # Set this if you want to run bots on process (not launch a new VM)
|
||||
@@ -27,14 +27,11 @@ export default function Call() {
|
||||
|
||||
// Create a new room for the story session
|
||||
try {
|
||||
const response = await fetch("/create", {
|
||||
const response = await fetch("/start_bot", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
room_url: process.env.NEXT_PUBLIC_ROOM_URL || null,
|
||||
}),
|
||||
});
|
||||
|
||||
const { room_url, token } = await response.json();
|
||||
@@ -55,21 +52,9 @@ export default function Call() {
|
||||
// Disable local audio, the bot will say hello first
|
||||
daily.setLocalAudio(false);
|
||||
|
||||
// Start the bot
|
||||
const resp = await fetch("/start", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
room_url,
|
||||
}),
|
||||
});
|
||||
|
||||
setState("started");
|
||||
} catch (error) {
|
||||
setState("error");
|
||||
leave();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,7 +64,13 @@ export default function Call() {
|
||||
}
|
||||
|
||||
if (state === "error") {
|
||||
return <div>An Error occured</div>;
|
||||
return (
|
||||
<div className="flex items-center mx-auto">
|
||||
<p className="text-red-500 font-semibold bg-white px-4 py-2 shadow-xl rounded-lg">
|
||||
This demo is currently at capacity. Please try again later.
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (state === "started") {
|
||||
|
||||
@@ -108,26 +108,26 @@ export default function DevicePicker({}: Props) {
|
||||
{hasMicError && (
|
||||
<div className="error">
|
||||
{micState === "blocked" ? (
|
||||
<p>
|
||||
<p className="text-red-500">
|
||||
Please check your browser and system permissions. Make sure that
|
||||
this app is allowed to access your microphone.
|
||||
</p>
|
||||
) : micState === "in-use" ? (
|
||||
<p>
|
||||
<p className="text-red-500">
|
||||
Your microphone is being used by another app. Please close any
|
||||
other apps using your microphone and restart this app.
|
||||
</p>
|
||||
) : micState === "not-found" ? (
|
||||
<p>
|
||||
<p className="text-red-500">
|
||||
No microphone seems to be connected. Please connect a microphone.
|
||||
</p>
|
||||
) : micState === "not-supported" ? (
|
||||
<p>
|
||||
<p className="text-red-500">
|
||||
This app is not supported on your device. Please update your
|
||||
software or use a different device.
|
||||
</p>
|
||||
) : (
|
||||
<p>
|
||||
<p className="text-red-500">
|
||||
There seems to be an issue accessing your microphone. Try
|
||||
restarting the app or consult a system administrator.
|
||||
</p>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import React from "react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import DevicePicker from "@/components/DevicePicker";
|
||||
import { IconEar, IconLoader2 } from "@tabler/icons-react";
|
||||
import { IconAlertCircle, IconEar, IconLoader2 } from "@tabler/icons-react";
|
||||
|
||||
type SetupProps = {
|
||||
handleStart: () => void;
|
||||
@@ -24,7 +24,6 @@ export const Setup: React.FC<SetupProps> = ({ handleStart }) => {
|
||||
<h1 className="text-4xl font-bold text-pretty tracking-tighter mb-4">
|
||||
Welcome to <span className="text-sky-500">Storytime</span>
|
||||
</h1>
|
||||
|
||||
{state === "intro" ? (
|
||||
<>
|
||||
<p className="text-gray-600 leading-relaxed text-pretty">
|
||||
@@ -38,6 +37,9 @@ export const Setup: React.FC<SetupProps> = ({ handleStart }) => {
|
||||
<IconEar size={24} /> For best results, try in a quiet
|
||||
environment!
|
||||
</p>
|
||||
<p className="flex flex-row gap-2 text-gray-600 font-medium text-red-500">
|
||||
<IconAlertCircle size={24} /> This demo expires after 5 minutes.
|
||||
</p>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
@@ -49,7 +51,6 @@ export const Setup: React.FC<SetupProps> = ({ handleStart }) => {
|
||||
<DevicePicker />
|
||||
</>
|
||||
)}
|
||||
|
||||
<hr className="border-gray-150 my-2" />
|
||||
|
||||
<Button
|
||||
|
||||
@@ -1,2 +1 @@
|
||||
NEXT_PUBLIC_ROOM_URL=
|
||||
SITE_URL=
|
||||
6240
examples/storytelling-chatbot/frontend/package-lock.json
generated
Normal file
6240
examples/storytelling-chatbot/frontend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,5 @@
|
||||
async_timeout
|
||||
fastapi
|
||||
uvicorn
|
||||
requests
|
||||
python-dotenv
|
||||
pipecat-ai[daily,openai,fal]
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
import sys
|
||||
|
||||
|
||||
from pipecat.frames.frames import LLMMessagesFrame, StopTaskFrame
|
||||
from pipecat.frames.frames import LLMMessagesFrame, StopTaskFrame, EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
@@ -139,6 +139,16 @@ async def main(room_url, token=None):
|
||||
|
||||
main_task = PipelineTask(main_pipeline)
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
intro_task.queue_frame(EndFrame())
|
||||
await main_task.queue_frame(EndFrame())
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if state == "left":
|
||||
await main_task.queue_frame(EndFrame())
|
||||
|
||||
await runner.run(main_task)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
251
examples/storytelling-chatbot/src/bot_runner.py
Normal file
251
examples/storytelling-chatbot/src/bot_runner.py
Normal file
@@ -0,0 +1,251 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomParams)
|
||||
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(override=True)
|
||||
|
||||
# ------------ Fast API Config ------------ #
|
||||
|
||||
MAX_SESSION_TIME = 5 * 60 # 5 minutes
|
||||
|
||||
daily_helpers = {}
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
|
||||
aiohttp_session=aiohttp_session
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Mount the static directory
|
||||
STATIC_DIR = "frontend/out"
|
||||
|
||||
|
||||
# ------------ Fast API Routes ------------ #
|
||||
|
||||
app.mount("/static", StaticFiles(directory=STATIC_DIR, html=True), name="static")
|
||||
|
||||
|
||||
@app.post("/start_bot")
|
||||
async def start_bot(request: Request) -> JSONResponse:
|
||||
if os.getenv("ENV", "dev") == "production":
|
||||
# Only allow requests from the specified domain
|
||||
host_header = request.headers.get("host")
|
||||
allowed_domains = ["storytelling-chatbot.fly.dev", "www.storytelling-chatbot.fly.dev"]
|
||||
# Check if the Host header matches the allowed domain
|
||||
if host_header not in allowed_domains:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
try:
|
||||
data = await request.json()
|
||||
# Is this a webhook creation request?
|
||||
if "test" in data:
|
||||
return JSONResponse({"test": True})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Use specified room URL, or create a new one if not specified
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", "")
|
||||
|
||||
if not room_url:
|
||||
params = DailyRoomParams(
|
||||
properties=DailyRoomProperties()
|
||||
)
|
||||
try:
|
||||
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Unable to provision room {e}")
|
||||
else:
|
||||
# Check passed room URL exists, we should assume that it already has a sip set up
|
||||
try:
|
||||
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Room not found: {room_url}")
|
||||
|
||||
# Give the agent a token to join the session
|
||||
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
if not room or not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
# Launch a new VM, or run as a shell process (not recommended)
|
||||
if os.getenv("RUN_AS_VM", False):
|
||||
try:
|
||||
await virtualize_bot(room.url, token)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to spawn VM: {e}")
|
||||
else:
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[f"python3 -m bot -u {room.url} -t {token}"],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
# Grab a token for the user to join with
|
||||
user_token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
|
||||
|
||||
return JSONResponse({
|
||||
"room_url": room.url,
|
||||
"token": user_token,
|
||||
})
|
||||
|
||||
|
||||
@app.get("/{path_name:path}", response_class=FileResponse)
|
||||
async def catch_all(path_name: Optional[str] = ""):
|
||||
if path_name == "":
|
||||
return FileResponse(f"{STATIC_DIR}/index.html")
|
||||
|
||||
file_path = Path(STATIC_DIR) / (path_name or "")
|
||||
|
||||
if file_path.is_file():
|
||||
return file_path
|
||||
|
||||
html_file_path = file_path.with_suffix(".html")
|
||||
if html_file_path.is_file():
|
||||
return FileResponse(html_file_path)
|
||||
|
||||
raise HTTPException(status_code=450, detail="Incorrect API call")
|
||||
|
||||
|
||||
# ------------ Virtualization ------------ #
|
||||
|
||||
async def virtualize_bot(room_url: str, token: str):
|
||||
"""
|
||||
This is an example of how to virtualize the bot using Fly.io
|
||||
You can adapt this method to use whichever cloud provider you prefer.
|
||||
"""
|
||||
FLY_API_HOST = os.getenv("FLY_API_HOST", "https://api.machines.dev/v1")
|
||||
FLY_APP_NAME = os.getenv("FLY_APP_NAME", "storytelling-chatbot")
|
||||
FLY_API_KEY = os.getenv("FLY_API_KEY", "")
|
||||
FLY_HEADERS = {
|
||||
'Authorization': f"Bearer {FLY_API_KEY}",
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use the same image as the bot runner
|
||||
async with session.get(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Unable to get machine info from Fly: {text}")
|
||||
|
||||
data = await r.json()
|
||||
image = data[0]['config']['image']
|
||||
|
||||
# Machine configuration
|
||||
cmd = f"python3 src/bot.py -u {room_url} -t {token}"
|
||||
cmd = cmd.split()
|
||||
worker_props = {
|
||||
"config": {
|
||||
"image": image,
|
||||
"auto_destroy": True,
|
||||
"init": {
|
||||
"cmd": cmd
|
||||
},
|
||||
"restart": {
|
||||
"policy": "no"
|
||||
},
|
||||
"guest": {
|
||||
"cpu_kind": "shared",
|
||||
"cpus": 1,
|
||||
"memory_mb": 512
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
# Spawn a new machine instance
|
||||
async with session.post(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS, json=worker_props) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Problem starting a bot worker: {text}")
|
||||
|
||||
data = await r.json()
|
||||
# Wait for the machine to enter the started state
|
||||
vm_id = data['id']
|
||||
|
||||
async with session.get(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started", headers=FLY_HEADERS) as r:
|
||||
if r.status != 200:
|
||||
text = await r.text()
|
||||
raise Exception(f"Bot was unable to enter started state: {text}")
|
||||
|
||||
print(f"Machine joined room: {room_url}")
|
||||
|
||||
|
||||
# ------------ Main ------------ #
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
required_env_vars = ['OPENAI_API_KEY', 'DAILY_API_KEY',
|
||||
'FAL_KEY', 'ELEVENLABS_VOICE_ID', 'ELEVENLABS_API_KEY']
|
||||
for env_var in required_env_vars:
|
||||
if env_var not in os.environ:
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
import uvicorn
|
||||
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
uvicorn.run(
|
||||
"bot_runner:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
reload=config.reload
|
||||
)
|
||||
@@ -1,175 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import atexit
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
|
||||
from utils.daily_helpers import create_room as _create_room, get_token, get_name_from_url
|
||||
|
||||
MAX_BOTS_PER_ROOM = 1
|
||||
|
||||
# Bot sub-process dict for status reporting and concurrency control
|
||||
bot_procs = {}
|
||||
|
||||
|
||||
def cleanup():
|
||||
# Clean up function, just to be extra safe
|
||||
for proc in bot_procs.values():
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Mount the static directory
|
||||
STATIC_DIR = "frontend/out"
|
||||
|
||||
app.mount("/static", StaticFiles(directory=STATIC_DIR, html=True), name="static")
|
||||
|
||||
|
||||
@app.post("/create")
|
||||
async def create_room(request: Request) -> JSONResponse:
|
||||
data = await request.json()
|
||||
|
||||
if data.get('room_url') is not None:
|
||||
room_url = data.get('room_url')
|
||||
room_name = get_name_from_url(room_url)
|
||||
else:
|
||||
room_url, room_name = _create_room()
|
||||
|
||||
token = get_token(room_url)
|
||||
|
||||
return JSONResponse({"room_url": room_url, "room_name": room_name, "token": token})
|
||||
|
||||
|
||||
@app.post("/start")
|
||||
async def start_agent(request: Request) -> JSONResponse:
|
||||
data = await request.json()
|
||||
|
||||
# Is this a webhook creation request?
|
||||
if "test" in data:
|
||||
return JSONResponse({"test": True})
|
||||
|
||||
# Ensure the room property is present
|
||||
room_url = data.get('room_url')
|
||||
if not room_url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
|
||||
|
||||
# Check if there is already an existing process running in this room
|
||||
num_bots_in_room = sum(
|
||||
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
|
||||
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
|
||||
|
||||
# Get the token for the room
|
||||
token = get_token(room_url)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
# Spawn a new agent, and join the user session
|
||||
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"python3 -m bot -u {room_url} -t {token}"
|
||||
],
|
||||
shell=True,
|
||||
bufsize=1,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__))
|
||||
)
|
||||
bot_procs[proc.pid] = (proc, room_url)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start subprocess: {e}")
|
||||
|
||||
return JSONResponse({"bot_id": proc.pid, "room_url": room_url})
|
||||
|
||||
|
||||
@app.get("/status/{pid}")
|
||||
def get_status(pid: int):
|
||||
# Look up the subprocess
|
||||
proc = bot_procs.get(pid)
|
||||
|
||||
# If the subprocess doesn't exist, return an error
|
||||
if not proc:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Bot with process id: {pid} not found")
|
||||
|
||||
# Check the status of the subprocess
|
||||
if proc[0].poll() is None:
|
||||
status = "running"
|
||||
else:
|
||||
status = "finished"
|
||||
|
||||
return JSONResponse({"bot_id": pid, "status": status})
|
||||
|
||||
|
||||
@app.get("/{path_name:path}", response_class=FileResponse)
|
||||
async def catch_all(path_name: Optional[str] = ""):
|
||||
if path_name == "":
|
||||
return FileResponse(f"{STATIC_DIR}/index.html")
|
||||
|
||||
file_path = Path(STATIC_DIR) / (path_name or "")
|
||||
|
||||
if file_path.is_file():
|
||||
return file_path
|
||||
|
||||
html_file_path = file_path.with_suffix(".html")
|
||||
if html_file_path.is_file():
|
||||
return FileResponse(html_file_path)
|
||||
|
||||
raise HTTPException(status_code=450, detail="Incorrect API call")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check environment variables
|
||||
required_env_vars = ['OPENAI_API_KEY', 'DAILY_API_KEY',
|
||||
'FAL_KEY', 'ELEVENLABS_VOICE_ID', 'ELEVENLABS_API_KEY']
|
||||
for env_var in required_env_vars:
|
||||
if env_var not in os.environ:
|
||||
raise Exception(f"Missing environment variable: {env_var}.")
|
||||
|
||||
import uvicorn
|
||||
|
||||
default_host = os.getenv("HOST", "0.0.0.0")
|
||||
default_port = int(os.getenv("FAST_API_PORT", "7860"))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Daily Storyteller FastAPI server")
|
||||
parser.add_argument("--host", type=str,
|
||||
default=default_host, help="Host address")
|
||||
parser.add_argument("--port", type=int,
|
||||
default=default_port, help="Port number")
|
||||
parser.add_argument("--reload", action="store_true",
|
||||
help="Reload code on change")
|
||||
|
||||
config = parser.parse_args()
|
||||
|
||||
uvicorn.run(
|
||||
"server:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
reload=config.reload
|
||||
)
|
||||
@@ -1,109 +0,0 @@
|
||||
|
||||
import urllib.parse
|
||||
import os
|
||||
import time
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
|
||||
daily_api_key = os.getenv("DAILY_API_KEY")
|
||||
|
||||
|
||||
def create_room() -> tuple[str, str]:
|
||||
"""
|
||||
Helper function to create a Daily room.
|
||||
# See: https://docs.daily.co/reference/rest-api/rooms
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the room URL and room name.
|
||||
|
||||
Raises:
|
||||
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
|
||||
"""
|
||||
room_props = {
|
||||
"exp": time.time() + 60 * 60, # 1 hour
|
||||
"enable_chat": True,
|
||||
"enable_emoji_reactions": True,
|
||||
"eject_at_room_exp": True,
|
||||
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
|
||||
}
|
||||
res = requests.post(
|
||||
f"https://{daily_api_path}/rooms",
|
||||
headers={"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": room_props
|
||||
},
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise Exception(f"Unable to create room: {res.text}")
|
||||
|
||||
data = res.json()
|
||||
room_url: str = data.get("url")
|
||||
room_name: str = data.get("name")
|
||||
if room_url is None or room_name is None:
|
||||
raise Exception("Missing room URL or room name in response")
|
||||
|
||||
return room_url, room_name
|
||||
|
||||
|
||||
def get_name_from_url(room_url: str) -> str:
|
||||
"""
|
||||
Extracts the name from a given room URL.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the room.
|
||||
|
||||
Returns:
|
||||
str: The extracted name from the room URL.
|
||||
"""
|
||||
return urllib.parse.urlparse(room_url).path[1:]
|
||||
|
||||
|
||||
def get_token(room_url: str) -> str:
|
||||
"""
|
||||
Retrieves a meeting token for the specified Daily room URL.
|
||||
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room.
|
||||
|
||||
Returns:
|
||||
str: The meeting token.
|
||||
|
||||
Raises:
|
||||
Exception: If no room URL is specified or if no Daily API key is specified.
|
||||
Exception: If there is an error creating the meeting token.
|
||||
"""
|
||||
if not room_url:
|
||||
raise Exception(
|
||||
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
|
||||
|
||||
if not daily_api_key:
|
||||
raise Exception(
|
||||
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
|
||||
|
||||
expiration: float = time.time() + 60 * 60
|
||||
room_name = get_name_from_url(room_url)
|
||||
|
||||
res: requests.Response = requests.post(
|
||||
f"https://{daily_api_path}/meeting-tokens",
|
||||
headers={
|
||||
"Authorization": f"Bearer {daily_api_key}"},
|
||||
json={
|
||||
"properties": {
|
||||
"room_name": room_name,
|
||||
"is_owner": True, # Owner tokens required for transcription
|
||||
"exp": expiration}},
|
||||
)
|
||||
|
||||
if res.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to create meeting token: {res.status_code} {res.text}")
|
||||
|
||||
token: str = res.json()["token"]
|
||||
|
||||
return token
|
||||
13
examples/studypal/README.md
Normal file
13
examples/studypal/README.md
Normal file
@@ -0,0 +1,13 @@
|
||||
# studypal
|
||||
|
||||
### Have a conversation about any article on the web
|
||||
|
||||
studypal is a fast conversational AI built using [Daily](https://www.daily.co/) for real-time media transport and [Cartesia](https://cartesia.ai) for text-to-speech. Everything is orchestrated together (VAD -> STT -> LLM -> TTS) using [Pipecat](https://www.pipecat.ai/).
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository
|
||||
2. Copy `env.example` to a `.env` file and add API keys
|
||||
3. Install the required packages: `pip install -r requirements.txt`
|
||||
4. Run `python3 studypal.py` from your command line.
|
||||
5. While the app is running, go to the `https://<yourdomain>.daily.co/<room_url>` set in `DAILY_SAMPLE_ROOM_URL` and talk to studypal!
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user