Compare commits

...

214 Commits

Author SHA1 Message Date
Brian Hill
23be3d8b57 [ENG-7781] Enable docker build without git 2024-08-12 08:47:36 -04:00
Aleix Conchillo Flaqué
2b76c3c15a update macos-py3.10-requirements 2024-08-09 17:18:30 -07:00
Aleix Conchillo Flaqué
cedd7dde18 update linux-py3.10-requirements.txt 2024-08-09 17:14:46 -07:00
Lewis Wolfgang
d088608d8e Merge pull request #340 from pipecat-ai/lewis/silero-vad-via-pip
Install Silero VAD via pip
2024-08-09 13:27:29 -04:00
Aleix Conchillo Flaqué
06ee29bb8b Merge pull request #359 from pipecat-ai/aleix/twilio-elevenlabs-sample-rates
twilio and elevenlabs sample rates
2024-08-09 09:38:35 -07:00
Aleix Conchillo Flaqué
d255e954d6 services(elevenlabs): allow specifying output_format 2024-08-09 09:38:20 -07:00
Aleix Conchillo Flaqué
6a7ab6b8ac serializers(twilio): allow specifying input and output sample rates 2024-08-09 09:37:51 -07:00
Aleix Conchillo Flaqué
45b18cc0b1 Merge pull request #358 from pipecat-ai/aleix/daily-create-room-exp-fixes
transports(daily): fixed create_room expirations
2024-08-09 09:37:01 -07:00
Aleix Conchillo Flaqué
0479431f0a Merge pull request #357 from pipecat-ai/aleix/daily-on-participant-updated
transports(daily): added on_participant_updated event
2024-08-09 09:36:46 -07:00
Aleix Conchillo Flaqué
ec58dbd791 transports(daily): added on_participant_updated event
Fixes #353
2024-08-09 09:36:24 -07:00
Aleix Conchillo Flaqué
91de68aab3 Merge pull request #355 from pipecat-ai/aleix/usage-metrics-update
processors(base): add start_llm_usage_metrics and start_tts_usage_met…
2024-08-09 09:35:36 -07:00
Aleix Conchillo Flaqué
85efc30145 Merge pull request #356 from pipecat-ai/aleix/eleven_turbo_v2_5
services(elevenlabs): update default model to eleven_turbo_v2_5
2024-08-09 09:34:47 -07:00
Aleix Conchillo Flaqué
0032594f21 transports(daily): fixed create_room expirations
Fixes #348
2024-08-08 22:04:22 -07:00
Aleix Conchillo Flaqué
829fdc5679 services(elevenlabs): update default model to eleven_turbo_v2_5
Fixes #349
2024-08-08 21:38:18 -07:00
Aleix Conchillo Flaqué
22e176e329 processors(base): add start_llm_usage_metrics and start_tts_usage_metrics 2024-08-08 16:46:56 -07:00
Lewis Wolfgang
826a70a137 Merge pull request #354 from pipecat-ai/lewis/delete_room_by_name
Add delete_room_by_name to DailyRESTHelper
2024-08-08 17:09:21 -04:00
Lewis Wolfgang
dd0ea674af Treat 404 (room not found) as a success for deletion 2024-08-08 16:57:58 -04:00
Lewis Wolfgang
a4761b8921 Add delete_room_by_name to DailyRESTHelper 2024-08-08 16:31:01 -04:00
chadbailey59
3958bb7903 Additional LLM and TTS metrics (#343)
* added llm and tts usage metrics

* Metrics debug logging

* cleanup
2024-08-07 08:55:51 -05:00
Aleix Conchillo Flaqué
83a037a7ce Merge pull request #345 from pipecat-ai/aleix/base-output-render-time-fixes
transports(base_output): improve render sleep computation
2024-08-06 17:30:47 -07:00
Aleix Conchillo Flaqué
a3eb8337a6 Merge pull request #342 from pipecat-ai/aleix/base-output-transport-push-audio
transport(base_output): push audio downstream
2024-08-06 17:30:32 -07:00
Aleix Conchillo Flaqué
541072f8e0 transports(base_output): improve render sleep computation 2024-08-06 17:20:41 -07:00
Aleix Conchillo Flaqué
881248cbd6 transport(base_output): push audio downstream 2024-08-05 14:00:09 -07:00
Aleix Conchillo Flaqué
d4979f5e64 Merge pull request #337 from pipecat-ai/aleix/audio-video-sync-and-gstreamer
audio/video sync and gstreamer
2024-08-05 09:28:11 -07:00
Aleix Conchillo Flaqué
4133cd03bb processors(gstreamer): add clock_sync property 2024-08-05 09:23:25 -07:00
Lewis Wolfgang
9f07c3ca27 Fly.io example: remove step to cache silero models.
No longer necessary.
2024-08-05 10:12:35 -04:00
Lewis Wolfgang
b20bacb9ed Remove no longer needed code 2024-08-05 10:10:39 -04:00
Lewis Wolfgang
97cfbfee1d Install silero via pip 2024-08-05 10:01:27 -04:00
Aleix Conchillo Flaqué
fa7c941792 examples(gstreamer): add new GStreamer examples 2024-08-04 12:29:36 -07:00
Aleix Conchillo Flaqué
4738879f32 processors(gstreamer): add new GStreamerPipelineSource 2024-08-04 12:29:34 -07:00
Aleix Conchillo Flaqué
d5d88f756a transport(output): improve audio and image handling for video use cases 2024-08-04 12:29:08 -07:00
Aleix Conchillo Flaqué
65b136bf15 Merge pull request #334 from pipecat-ai/aleix/cleanup-examples-remove-requests
cleanup examples and remove requests
2024-08-01 22:05:01 -07:00
Aleix Conchillo Flaqué
bee0b238e4 examples(storytelling-chatbot): include package-lock.json 2024-08-01 18:23:30 -07:00
Aleix Conchillo Flaqué
c891168ffb services: revert optional aiohttp.ClientSession 2024-08-01 18:22:56 -07:00
Aleix Conchillo Flaqué
6376c2f6aa transport(websocket): fix cancel 2024-08-01 18:09:16 -07:00
Aleix Conchillo Flaqué
4d9b7cdd61 DailyRESTHelper now receives an aiohttp client session 2024-08-01 18:08:57 -07:00
Aleix Conchillo Flaqué
8263d1dd6f update CHANGELOG with latest changes 2024-07-31 23:44:07 -07:00
Aleix Conchillo Flaqué
faf41c0b36 services: ignore yielded None values 2024-07-31 23:41:03 -07:00
Aleix Conchillo Flaqué
27a09c0b2c cleanup examples and remove requests library 2024-07-31 23:39:51 -07:00
Aleix Conchillo Flaqué
3db7f6a284 Merge pull request #333 from pipecat-ai/aleix/allow-internal-http-sessions-rebased
services: allow internal http sessions if none is given
2024-07-31 21:57:00 -07:00
Aleix Conchillo Flaqué
3bfeb5b5ef services: allow internal http sessions if none is given 2024-07-31 21:56:19 -07:00
Aleix Conchillo Flaqué
62a7a555b5 Merge pull request #330 from pipecat-ai/aleix/stop-and-cancel-are-different
EndFrame tries to end gracefully CancelFrame cancels tasks
2024-07-31 15:51:29 -07:00
Aleix Conchillo Flaqué
d60e99a043 examples(06a-image-sync): make sure frames go downstream 2024-07-30 11:41:58 -07:00
Aleix Conchillo Flaqué
77723b34c7 EndFrame tries to end gracefully CancelFrame cancels tasks 2024-07-30 11:41:19 -07:00
Aleix Conchillo Flaqué
c466d34a06 Merge pull request #328 from pipecat-ai/aleix/rtvi-towards-custom-pipelines
processors(rtvi): refactor to allow future custom pipelines
2024-07-29 15:07:57 -07:00
Aleix Conchillo Flaqué
f816897833 Merge pull request #327 from pipecat-ai/aleix/bot-start-stop-speaking-frames
bot start stop speaking frames
2024-07-27 17:21:23 -07:00
Aleix Conchillo Flaqué
c1e8a5e522 processors(rtvi): refactor to allow future custom pipelines 2024-07-26 10:26:36 -07:00
Aleix Conchillo Flaqué
76aca32f2e transport(output): emit new bot start|stop speaking frames 2024-07-25 14:50:33 -07:00
Aleix Conchillo Flaqué
7e31b2a795 processors(user_idle): use user speaking instead of interruption frames 2024-07-25 14:47:56 -07:00
Aleix Conchillo Flaqué
028e38a86b Merge pull request #326 from pipecat-ai/aleix/rtvi-bot-ready-fixes
rtvi: send bot-ready when pipeline is ready and first participant joins
2024-07-25 11:39:14 -07:00
Aleix Conchillo Flaqué
8cf7649855 processors(rtvi): send bot-ready when pipeline AND first participant joins 2024-07-25 11:25:51 -07:00
Aleix Conchillo Flaqué
64f5119b08 transports(base): allow registering event handlers without decorators 2024-07-25 11:24:24 -07:00
Aleix Conchillo Flaqué
4d606aefb3 update CHANGELOG 2024-07-25 09:57:01 -07:00
Ankur Duggal
4bafdaa04d Deepgram Adjustments (#313) 2024-07-25 09:51:51 -07:00
Aleix Conchillo Flaqué
5afe1abf82 Merge pull request #323 from pipecat-ai/aleix/base-input-handle-incoming-interruptions
transports(inputs): handle start/stop interruption frames
2024-07-24 15:16:18 -07:00
Aleix Conchillo Flaqué
f066d50b98 transports(inputs): handle start/stop interruption frames 2024-07-24 15:15:09 -07:00
Aleix Conchillo Flaqué
91103e21cc github(publish_test): download tags and depth to 100 2024-07-24 14:49:09 -07:00
Aleix Conchillo Flaqué
f44dabcd65 Merge pull request #322 from pipecat-ai/aleix/base-input-transport-system-frames-fix
transports(inputs): don't queue incoming system frames
2024-07-24 14:44:18 -07:00
Aleix Conchillo Flaqué
0fd2fca231 frames: StartFrame is now a control frame 2024-07-24 14:42:59 -07:00
Aleix Conchillo Flaqué
5bb64098e7 transports(inputs): don't queue incoming system frames 2024-07-24 14:35:00 -07:00
Aleix Conchillo Flaqué
3fc85e75e0 Merge pull request #320 from pipecat-ai/aleix/req-updates-072324
update project requirements and dependencies
2024-07-23 17:45:18 -07:00
Aleix Conchillo Flaqué
3f61ea16b7 update project requirements and dependencies 2024-07-23 17:35:47 -07:00
Aleix Conchillo Flaqué
4b393092b5 Merge pull request #319 from pipecat-ai/aleix/daily-completion-callbacks-0.0.39-fix
transports(daily): fix completion callbacks handling
2024-07-23 15:27:26 -07:00
Aleix Conchillo Flaqué
b583f5162b transports(daily): fix completion callbacks handling 2024-07-23 15:25:59 -07:00
Aleix Conchillo Flaqué
060a22f395 github: only run publish_test manually
We need to run this manually to avoid test.pypi.org project size limits.
2024-07-23 14:19:24 -07:00
Aleix Conchillo Flaqué
d3e85355f1 Merge pull request #318 from pipecat-ai/aleix/prepare-0.0.38
update CHANGELOG for 0.0.38
2024-07-23 14:12:01 -07:00
Aleix Conchillo Flaqué
83e730b768 update CHANGELOG for 0.0.38 2024-07-23 14:10:10 -07:00
Aleix Conchillo Flaqué
5fcc96446c Merge pull request #317 from pipecat-ai/aleix/silero-repo-params
vad(silero): expose cache and repo parameters
2024-07-23 12:13:20 -07:00
Aleix Conchillo Flaqué
ad88925154 vad(silero): expose cache and repo parameters 2024-07-23 12:12:28 -07:00
Aleix Conchillo Flaqué
0a6ddbf15c Merge pull request #316 from pipecat-ai/aleix/metrics-improvements
metrics improvements
2024-07-23 11:23:57 -07:00
Aleix Conchillo Flaqué
08e0722d97 fix initial metrics format 2024-07-23 11:23:03 -07:00
Aleix Conchillo Flaqué
05d4fba551 processors(rtvi): send initial empty metrics 2024-07-23 11:22:41 -07:00
Aleix Conchillo Flaqué
f41c2b3c9f transports(daily): don't send empty metrics 2024-07-23 11:22:41 -07:00
Aleix Conchillo Flaqué
69f64899fe pipeline: add send_initial_empty_metrics flag 2024-07-23 11:22:41 -07:00
Aleix Conchillo Flaqué
33f0865430 Merge pull request #315 from pipecat-ai/aleix/stop-transcription-error
transports(daily): wait until start|stop_transcription are finished
2024-07-23 11:18:59 -07:00
Aleix Conchillo Flaqué
ad5b9202ab transports(daily): wait until start|stop_transcription are finished
Fixes #305
2024-07-22 22:59:30 -07:00
Aleix Conchillo Flaqué
1676693091 Merge pull request #314 from pipecat-ai/aleix/transcription-timestamps
services: transcription timestamp should use ISO8601 format
2024-07-22 22:43:01 -07:00
Aleix Conchillo Flaqué
0852b50b8f services: transcription timestamp should use ISO8601 format 2024-07-22 22:40:28 -07:00
Aleix Conchillo Flaqué
eb998aa502 Merge pull request #312 from pipecat-ai/aleix/rtvi-support
RTVI support
2024-07-22 16:58:40 -07:00
Aleix Conchillo Flaqué
6dab0e9de7 update CHANGELOG for 0.0.37 2024-07-22 16:00:30 -07:00
Aleix Conchillo Flaqué
95ff1d141c update CHANGELOG with RTVIProcessor 2024-07-22 16:00:26 -07:00
Aleix Conchillo Flaqué
87bc8a9da6 examples: remove RTVI since there are full demos elsewhere 2024-07-22 15:53:39 -07:00
Aleix Conchillo Flaqué
087fe9a537 services(cartesia): fix TTFB 2024-07-22 15:30:16 -07:00
Aleix Conchillo Flaqué
c1170260b5 processors(rtvi): use generic LLM and TTS names 2024-07-22 15:27:33 -07:00
Aleix Conchillo Flaqué
65cdf50774 processors(rtvi): fix task cleanup 2024-07-22 15:01:45 -07:00
Aleix Conchillo Flaqué
9233bb490c processors(rtvi): add support for "tts-text" messages 2024-07-22 11:40:17 -07:00
Aleix Conchillo Flaqué
43932220f7 processors(rtvi): use only user-transcription 2024-07-22 09:40:16 -07:00
Aleix Conchillo Flaqué
cea4d1894e processors(rtvi): change voice before LLM updates 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
80baa0358d processors(rtvi): lable is now rtvi 2024-07-22 09:32:18 -07:00
Chad Bailey
5d73db53a0 initial pseudo function calling 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
302ea90dce processors(rtvi): messages now require an id 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
37b04ed283 processors(rtvi): use send a type=response as command responses 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
be6995cfdf processors(rtvi): renamed realtime-ai to rtvi 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
dfbc11300c processors(realtime-ai): use label instead of tag 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
82d539d174 processors(realtime-ai): add support for interrupting the bot 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
6e00f31014 updated CHANGELOG with new frames and realtime-ai changes 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
a46ac3cc92 examples: moved 18-realtime-ai.py to examples/realtime-ai 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
6fbf98d8e2 processors(realtime-ai): llm-context now uses a data field 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
f094c42728 processors(realtime-ai): add transcription messages 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
13827e1282 processors(realtime-ai): send a successful response for every command 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
32170b47d9 processors(realtime-ai): add user-[start|stopped]-speaking messages 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
09c05354c2 processors(realtime-ai): fix voice initialization 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
b0b1475563 processors(realtime-ai): add support making TTS to speak 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
b85dd7283a processors(realtime-ai): add support for appending to the LLM context 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
846ae765e5 services(TTSService): fix sentence cleanup 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
4c629e538e processors(realtime-ai): add assistant before output transport
Cartesia can do word-to-word output instead of full sentences. This means that
for properly adding things into the context we need to add it before the
transport, otherwise some words might be lost.
2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
f6e22bb3b9 processors(realtime-ai): add silero vad to the transport 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
46a048d7f6 processors(realtime-ai): allow default setup to be None 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
bd9f4eea06 processors(realtime-ai): provide default values 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
0a672e61e2 processors(realtime-ai): update it to use groq by default 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
29a8530221 processors(realtime-ai): add support for updating config (model, voice...) 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
3e738642a7 processors(realtime-ai): add support for getting/updating LLM context 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
f551f55f03 examples: add new foundational/18-realtime-ai.py 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
9f012c8002 processors: add new RealtimeAIProcessor 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
0a69a9e5ef transport(daily): also accept TransportMessageFrame 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
194790183a processor: add support for setting a processor parent 2024-07-22 09:32:18 -07:00
Aleix Conchillo Flaqué
2227721173 update CHANGELOG with StatelessTextTransformer fix (update) 2024-07-22 09:30:45 -07:00
Aleix Conchillo Flaqué
77a53da5f5 update CHANGELOG with StatelessTextTransformer fix 2024-07-22 09:28:38 -07:00
Aleix Conchillo Flaqué
ab63ff275d Merge pull request #310 from weedge/fix/StatelessTextTransformer
fix: push_frame use TextFrame
2024-07-22 09:25:27 -07:00
weedge
e5363f65f0 fix: push_frame use TextFrame
Signed-off-by: weedge <weege007@gmail.com>
2024-07-22 17:29:06 +08:00
Lewis Wolfgang
ffc157de65 Merge pull request #307 from pipecat-ai/lewis/increase_openai_keepalive_expiry
Allow openai http connections to remain open in the pool indefinitely.
2024-07-19 07:09:17 -04:00
Lewis Wolfgang
f9fdadb4c0 Allow openai http connections to remain open in the pool indefinitely.
Rather than expiring in 5 seconds.
2024-07-18 11:18:21 -04:00
Aleix Conchillo Flaqué
4efccb79f2 Merge pull request #306 from pipecat-ai/aleix/remove-llm-response-start-end-frame
remove LLMResponseStartFrame and LLMResponseEndFrame
2024-07-17 21:51:02 -07:00
Aleix Conchillo Flaqué
337968199a update CHANGELOG with CartesiaTTSService and TTSService updates 2024-07-17 20:58:10 -07:00
Aleix Conchillo Flaqué
37027f68cb remove LLMResponseStartFrame and LLMResponseEndFrame
This was added in the past to properly handle interruptions for the
LLMAssistantContextAggregator. But this is not necessary anymore since we can
handle interruptions by just processing the StartInterruptionFrame, so there's
no need for these extra frames.
2024-07-17 20:53:35 -07:00
Kwindla Hultman Kramer
d1b62c5495 Merge pull request #304 from pipecat-ai/khk/cartesia-continue
Cartesia streaming (WebSocket) and word-level timestamps support
2024-07-17 20:29:15 -07:00
Kwindla Hultman Kramer
355fe01cb7 fixed forgotten renames 2024-07-17 20:28:27 -07:00
Kwindla Hultman Kramer
9d050a16c7 committing an uncommitted file 2024-07-17 20:23:41 -07:00
Kwindla Hultman Kramer
fa53c67606 comments re fixes 2024-07-17 18:30:45 -07:00
Kwindla Hultman Kramer
5006376fe6 undo changes to 02-llm-say-one-thing.py 2024-07-17 15:18:47 -07:00
Kwindla Hultman Kramer
2204b8e205 cartesia streaming and context management via word-level timestamps 2024-07-17 15:17:00 -07:00
Kwindla Hultman Kramer
270007b17c wip - using cartesia word timestamps for context management 2024-07-17 14:13:52 -07:00
Kwindla Hultman Kramer
568eb2ef4c cartesia websockets and streaming 2024-07-17 14:13:52 -07:00
Kwindla Hultman Kramer
73ca9184a8 wip cartesia continuation (not working yet) 2024-07-17 14:13:52 -07:00
Aleix Conchillo Flaqué
5e8e11e16e pyproject: require python >= 3.10 2024-07-17 09:52:42 -07:00
Aleix Conchillo Flaqué
029bbc16f2 Merge pull request #286 from TomTom101/feat/regex_endofsentence
fix: No more falsely detect a sentence end on "U.S.A", "3:00 a.m."
2024-07-17 09:49:21 -07:00
Aleix Conchillo Flaqué
9e3d87e4f6 Merge pull request #291 from adidoit/main
Fix error with readme example - SyntaxError: positional argument follows keyword argument
2024-07-15 13:10:17 -04:00
Aleix Conchillo Flaqué
f1410a1127 Merge pull request #297 from wtlow003/main
fix: minor typo
2024-07-15 13:08:23 -04:00
wtlow003
2b980d16c3 fix: minor typo 2024-07-12 18:27:57 +08:00
Adi Pradhan
b2b97aafb8 fix error with readme example - SyntaxError: positional argument follows keyword argument 2024-07-10 09:50:20 -04:00
TomTom101
da2082b025 chore: Combined combinable lookaheads 2024-07-06 11:11:40 +02:00
TomTom101
327ea9d547 chore: Make it a const 2024-07-06 11:08:51 +02:00
TomTom101
b23db4a202 chore: commented regex 2024-07-06 11:06:52 +02:00
TomTom101
d1a36004ab fix: No more falsely detect a sentence end on "U.S.A", "3:00 a.m." and more 2024-07-06 11:01:32 +02:00
Jon Taylor
6071920c45 Merge pull request #284 from pipecat-ai/jpt/storybot-load-balance
Update storybot demo
2024-07-03 19:48:32 +01:00
Jon Taylor
5f539e1fba fixed teardown 2024-07-03 17:02:54 +01:00
Jon Taylor
8e1539c360 virtualized deployment and added room-based balancing 2024-07-03 16:48:14 +01:00
Aleix Conchillo Flaqué
065cfb2aca Merge pull request #280 from pipecat-ai/aleix/library-updates-070224
library updates 070224 and pipecat 0.0.36
2024-07-02 10:14:03 -07:00
Aleix Conchillo Flaqué
3147534e86 update CHANGELOG for 0.0.36 2024-07-02 10:13:26 -07:00
Aleix Conchillo Flaqué
be5603bf16 examples: fix 06a-image-sync.py 2024-07-02 10:11:50 -07:00
Aleix Conchillo Flaqué
b9b0bcdcbd services(azure): close the audio stream on exit 2024-07-02 10:11:35 -07:00
Aleix Conchillo Flaqué
5bcece56f3 services(cartesia): make sure we close the client on exit 2024-07-02 10:11:16 -07:00
Aleix Conchillo Flaqué
d67faef88c pyproject: multiple library updates 2024-07-02 09:05:37 -07:00
Aleix Conchillo Flaqué
8f6db5e905 Merge pull request #279 from pipecat-ai/aleix/gladia-stt-support
add Gladia STT support
2024-07-02 08:07:35 -07:00
Aleix Conchillo Flaqué
82e93a0560 use exclude_none=True when dumping BaseModels 2024-07-02 08:03:31 -07:00
Aleix Conchillo Flaqué
a9a82c083b services: add GladiaSTTService support 2024-07-02 08:03:29 -07:00
Aleix Conchillo Flaqué
974d9c33ed Merge pull request #278 from pipecat-ai/aleix/detect-user-idle
add support for detecting user idle
2024-07-02 08:01:27 -07:00
Jon Taylor
c1957ab694 Merge pull request #274 from pipecat-ai/jpt/deployment-examples
Example deployment pattern for fly.io
2024-07-02 10:17:13 +01:00
Jon Taylor
b20a10a4bc fixed double fly 2024-07-02 10:17:01 +01:00
Aleix Conchillo Flaqué
be14ce465d transports(daily): make sure we don't send data if client is closed 2024-07-01 18:26:13 -07:00
Aleix Conchillo Flaqué
d1ca0c5614 examples: added new 17-detect-user-idle.py 2024-07-01 18:17:43 -07:00
Aleix Conchillo Flaqué
535514f506 processors: added new UserIdleProcessor 2024-07-01 18:17:43 -07:00
Aleix Conchillo Flaqué
933b63cf13 processors: added new IdleFrameProcessor 2024-07-01 14:57:42 -07:00
Aleix Conchillo Flaqué
d7c3e380a5 added BotSpeakingFrame 2024-07-01 14:57:18 -07:00
Aleix Conchillo Flaqué
c5298f78cb add more missing keyword-only arguments 2024-07-01 12:34:53 -07:00
Jon Taylor
4f8f7b8d1d added on_call_state event to prevent idle vms 2024-07-01 19:21:16 +01:00
Aleix Conchillo Flaqué
d7d46919ac update macos-py3.10-requirements.txt 2024-07-01 11:00:59 -07:00
Aleix Conchillo Flaqué
e5d73d2e2e update linux-py3.10-requirements.txt 2024-07-01 10:58:49 -07:00
Aleix Conchillo Flaqué
b145e8ec90 update README with XTTS 2024-07-01 10:49:43 -07:00
Aleix Conchillo Flaqué
97ff4a1fb8 Merge pull request #275 from pipecat-ai/aleix/add-missing-keyword-separators
add missing keyword separators
2024-07-01 10:45:31 -07:00
Aleix Conchillo Flaqué
5018a552c1 services(xtts): no need the WAV header 2024-07-01 10:44:32 -07:00
Aleix Conchillo Flaqué
7f9fd9ffce examples: added 07i-interruptible-xtts 2024-07-01 10:41:34 -07:00
Aleix Conchillo Flaqué
ddd0ca6a8f update CHANGELOG 2024-07-01 10:27:26 -07:00
Aleix Conchillo Flaqué
06f817c7e3 transport(websocket): don't send if serializer returns None 2024-07-01 10:27:26 -07:00
Aleix Conchillo Flaqué
df4c3e56c4 services: add missing * keyword separator 2024-07-01 10:27:26 -07:00
Aleix Conchillo Flaqué
9d5c2b9656 Merge pull request #276 from eddieoz/feature/xtts
Added service XTTS
2024-07-01 10:26:53 -07:00
eddieoz
7ce59c5e2e added service xtts 2024-07-01 20:17:19 +03:00
Aleix Conchillo Flaqué
1c9631fc78 Merge pull request #271 from pipecat-ai/aleix/silero-vad-version
vad(silero): allow specifying a Silero VAD version
2024-07-01 09:39:59 -07:00
Aleix Conchillo Flaqué
efbe7297f7 vad(silero): allow specifying a Silero VAD version 2024-07-01 09:38:43 -07:00
Aleix Conchillo Flaqué
1b45946a61 Merge pull request #270 from pipecat-ai/aleix/async-frame-processor
add new AsyncFrameProcessor and AsyncAIService
2024-07-01 09:37:51 -07:00
Aleix Conchillo Flaqué
cbf5a6362c add new AsyncFrameProcessor and AsyncAIService 2024-07-01 09:37:02 -07:00
Aleix Conchillo Flaqué
583b96c341 Merge pull request #269 from pipecat-ai/aleix/improve-error-handling
improve error handling and don't swallow exceptions
2024-07-01 09:36:00 -07:00
Aleix Conchillo Flaqué
fc0920504d improve error handling and don't swallow exceptions 2024-07-01 09:35:45 -07:00
Aleix Conchillo Flaqué
abd65a93b2 Merge pull request #268 from pipecat-ai/aleix/websocket-dont-send-if-closed
transports(websocket): don't send data if websocket closed
2024-07-01 09:33:45 -07:00
Aleix Conchillo Flaqué
c3244fdd7a transports(websocket): don't send data if websocket closed 2024-07-01 09:31:58 -07:00
Aleix Conchillo Flaqué
e8f58938b0 Merge pull request #267 from pipecat-ai/aleix/processing-metrics
add support for processing metrics
2024-07-01 09:31:05 -07:00
Jon Taylor
602b4f34b1 added example fly.toml 2024-07-01 16:50:53 +01:00
Jon Taylor
0399c84dfa added flyio deployment example 2024-07-01 16:46:38 +01:00
Aleix Conchillo Flaqué
fd5d879bf5 add support for processing metrics
Processing metrics indicate how much time a processor takes to generate all of
its output.
2024-06-28 14:26:57 -07:00
Aleix Conchillo Flaqué
8dff460307 Merge pull request #266 from pipecat-ai/aleix/silero-num-frames-fixes
vad: fix Silero VAD required number of frames
2024-06-28 11:25:55 -07:00
Aleix Conchillo Flaqué
cce1ddb183 vad: fix Silero VAD required number of frames 2024-06-28 10:45:48 -07:00
Aleix Conchillo Flaqué
8691d14289 Merge pull request #255 from Viking5274/main
Fix twilio error
2024-06-26 10:17:03 -07:00
daniil5701133
dd402da9e5 added handling streamSid after first wss connect
fixx name
2024-06-26 18:56:30 +03:00
Aleix Conchillo Flaqué
2fd04248f1 examples(storytelling-chatbot): upgrade npm vulnerabilities 2024-06-25 22:04:55 -07:00
Aleix Conchillo Flaqué
0ac42006f8 Merge pull request #260 from pipecat-ai/aleix/more-interruption-fixes
more interruption fixes
2024-06-25 21:52:02 -07:00
Aleix Conchillo Flaqué
66e331248d update CHANGELOG for 0.0.34 2024-06-25 21:43:23 -07:00
Aleix Conchillo Flaqué
4be3e8c87d aggregators: revert using intermediate results 2024-06-25 21:33:17 -07:00
Aleix Conchillo Flaqué
dac033fe61 services(azure): allow transcriptions during interruptions
If the user interrupts we can't just discard transcriptions because the user is
actually interrupting and talking.
2024-06-25 21:33:06 -07:00
Aleix Conchillo Flaqué
d302cbb114 services(deepgram): allow transcriptions during interruptions
If the user interrupts we can't just discard transcriptions because the user is
actually interrupting and talking.
2024-06-25 21:32:21 -07:00
Aleix Conchillo Flaqué
e3b407db28 Merge pull request #259 from pipecat-ai/aleix/prepare-0.0.33
update CHANGELOG for 0.0.33
2024-06-25 12:05:07 -07:00
Aleix Conchillo Flaqué
4ef623f09e update CHANGELOG for 0.0.33 2024-06-25 11:53:07 -07:00
Aleix Conchillo Flaqué
253530a63d Merge pull request #258 from pipecat-ai/aleix/upgrade-cartesia-1.0.0
services(cartesia): upgrade to new cartesia 1.0.0
2024-06-25 11:52:04 -07:00
Aleix Conchillo Flaqué
4f38d989f5 services(cartesia): upgrade to new cartesia 1.0.0 2024-06-25 11:51:34 -07:00
Aleix Conchillo Flaqué
84074e90ee Merge pull request #257 from pipecat-ai/aleix/cancel-all-tasks-when-interrutpted
cancel all tasks when interrutpted
2024-06-25 11:16:00 -07:00
Aleix Conchillo Flaqué
38aee7d8f2 services(azure): cancel tasks when interrupted and ignore incoming transcriptions 2024-06-25 11:15:26 -07:00
Aleix Conchillo Flaqué
64198313c6 services(deepgram): cancel tasks when interrupted and ignore incoming transcriptions 2024-06-25 11:15:07 -07:00
Aleix Conchillo Flaqué
d61b6c301c transports(base_input): create push tasks after pushing interruption 2024-06-25 11:15:07 -07:00
Aleix Conchillo Flaqué
83d1931266 Merge pull request #256 from pipecat-ai/aleix/tts-cleanup-when-interrupted
services(tts): strip before TTS and cleanup when interrupted
2024-06-25 11:14:32 -07:00
Aleix Conchillo Flaqué
c31f2ab285 services(tts): strip before TTS and cleanup when interrupted 2024-06-25 11:13:19 -07:00
Aleix Conchillo Flaqué
0ddc5721b4 Merge pull request #252 from pipecat-ai/aleix/daily-check-size-read-audio-frames
transports(daily): always check size of read audio frames
2024-06-25 09:45:05 -07:00
Aleix Conchillo Flaqué
98bd183bc4 pyproject: fix cartesia version and update requirements files 2024-06-25 09:43:54 -07:00
Aleix Conchillo Flaqué
aaa154524c Merge pull request #253 from pipecat-ai/aleix/llm-response-use-intermediate-results
aggregators: uses intermediate results for LLMAssistantResponseAggreg…
2024-06-24 19:21:14 -07:00
Aleix Conchillo Flaqué
beced68337 aggregators: uses intermediate results for LLMAssistantResponseAggregator 2024-06-24 17:33:45 -07:00
Aleix Conchillo Flaqué
94823ab952 transports(daily): always check size of read audio frames 2024-06-24 14:56:24 -07:00
139 changed files with 11591 additions and 2523 deletions

View File

@@ -1,10 +1,6 @@
name: publish-test
on:
workflow_dispatch:
push:
branches:
- main
on: workflow_dispatch
jobs:
build:
@@ -14,7 +10,6 @@ jobs:
- name: Checkout repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.gitref }}
fetch-tags: true
fetch-depth: 100
- name: Set up Python

View File

@@ -5,6 +5,262 @@ All notable changes to **pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- `ElevenLabsTTSService` can now specify ElevenLabs input parameters such as
`output_format`.
- `TwilioFrameSerializer` can now specify Twilio's and Pipecat's desired sample
rates to use.
- Added new `on_participant_updated` event to `DailyTransport`.
- Added `DailyRESTHelper.delete_room_by_name()`.
- Added LLM and TTS usage metrics. Those will be enabled by when
`enable_usage_metrics` is True.
- `AudioRawFrame`s are not pushed downstream from the base output
transport. This allows capturing the exact words the bot says by adding an STT
service at the end of the pipeline.
- Added new `GStreamerPipelineSource`. This processor can generate image or
audio frames from a GStreamer pipeline (e.g. reading an MP4 file, and RTP
stream or anything supported by GStreamer).
- Added `TransportParams.audio_out_is_live`. This flag is False by default and
it is useful to indicate we should not synchronize audio with sporadic images.
- Added new `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame` control
frames. These frames are pushed upstream and they should wrap
`BotSpeakingFrame`.
- Transports now allow you to register event handlers without decorators.
### Changed
- `ElevenLabsTTSService` now uses `eleven_turbo_v2_5` model by default.
- `BotSpeakingFrame` is now a control frame.
- `StartFrame` is now a control frame similar to `EndFrame`.
- `DeepgramTTSService` now is more customizable. You can adjust the encoding and
sample rate.
### Fixed
- Fixed and issue with `DailyRESTHelper.create_room()` expirations which would
cause this function to stop working after the initial expiration elapsed.
- Improved `EndFrame` and `CancelFrame` handling. `EndFrame` should end things
gracefully while a `CancelFrame` should cancel all running tasks as soon as
possible.
- Fixed an issue in `AIService` that would cause a yielded `None` value to be
processed.
- RTVI's `bot-ready` message is now sent when the RTVI pipeline is ready and
a first participant joins.
- Fixed a `BaseInputTransport` issue that was causing incoming system frames to
be queued instead of being pushed immediately.
- Fixed a `BaseInputTransport` issue that was causing start/stop interruptions
incoming frames to not cancel tasks and be processed properly.
### Other
- Added examples `foundational/18-gstreamer-filesrc.py` and
`foundational/18a-gstreamer-videotestsrc.py` that show how to use
`GStreamerPipelineSource`
- Remove `requests` library usage.
- Cleanup examples and use `DailyRESTHelper`.
## [0.0.39] - 2024-07-23
### Fixed
- Fixed a regression introduced in 0.0.38 that would cause Daily transcription
to stop the Pipeline.
## [0.0.38] - 2024-07-23
### Added
- Added `force_reload`, `skip_validation` and `trust_repo` to `SileroVAD` and
`SileroVADAnalyzer`. This allows caching and various GitHub repo validations.
- Added `send_initial_empty_metrics` flag to `PipelineParams` to request for
initial empty metrics (zero values). True by default.
### Fixed
- Fixed initial metrics format. It was using the wrong keys name/time instead of
processor/value.
- STT services should be using ISO 8601 time format for transcription frames.
- Fixed an issue that would cause Daily transport to show a stop transcription
error when actually none occurred.
## [0.0.37] - 2024-07-22
### Added
- Added `RTVIProcessor` which implements the RTVI-AI standard.
See https://github.com/rtvi-ai
- Added `BotInterruptionFrame` which allows interrupting the bot while talking.
- Added `LLMMessagesAppendFrame` which allows appending messages to the current
LLM context.
- Added `LLMMessagesUpdateFrame` which allows changing the LLM context for the
one provided in this new frame.
- Added `LLMModelUpdateFrame` which allows updating the LLM model.
- Added `TTSSpeakFrame` which causes the bot say some text. This text will not
be part of the LLM context.
- Added `TTSVoiceUpdateFrame` which allows updating the TTS voice.
### Removed
- We remove the `LLMResponseStartFrame` and `LLMResponseEndFrame` frames. These
were added in the past to properly handle interruptions for the
`LLMAssistantContextAggregator`. But the `LLMContextAggregator` is now based
on `LLMResponseAggregator` which handles interruptions properly by just
processing the `StartInterruptionFrame`, so there's no need for these extra
frames any more.
### Fixed
- Fixed an issue with `StatelessTextTransformer` where it was pushing a string
instead of a `TextFrame`.
- `TTSService` end of sentence detection has been improved. It now works with
acronyms, numbers, hours and others.
- Fixed an issue in `TTSService` that would not properly flush the current
aggregated sentence if an `LLMFullResponseEndFrame` was found.
### Performance
- `CartesiaTTSService` now uses websockets which improves speed. It also
leverages the new Cartesia contexts which maintains generated audio prosody
when multiple inputs are sent, therefore improving audio quality a lot.
## [0.0.36] - 2024-07-02
### Added
- Added `GladiaSTTService`.
See https://docs.gladia.io/chapters/speech-to-text-api/pages/live-speech-recognition
- Added `XTTSService`. This is a local Text-To-Speech service.
See https://github.com/coqui-ai/TTS
- Added `UserIdleProcessor`. This processor can be used to wait for any
interaction with the user. If the user doesn't say anything within a given
timeout a provided callback is called.
- Added `IdleFrameProcessor`. This processor can be used to wait for frames
within a given timeout. If no frame is received within the timeout a provided
callback is called.
- Added new frame `BotSpeakingFrame`. This frame will be continuously pushed
upstream while the bot is talking.
- It is now possible to specify a Silero VAD version when using `SileroVADAnalyzer`
or `SileroVAD`.
- Added `AysncFrameProcessor` and `AsyncAIService`. Some services like
`DeepgramSTTService` need to process things asynchronously. For example, audio
is sent to Deepgram but transcriptions are not returned immediately. In these
cases we still require all frames (except system frames) to be pushed
downstream from a single task. That's what `AsyncFrameProcessor` is for. It
creates a task and all frames should be pushed from that task. So, whenever a
new Deepgram transcription is ready that transcription will also be pushed
from this internal task.
- The `MetricsFrame` now includes processing metrics if metrics are enabled. The
processing metrics indicate the time a processor needs to generate all its
output. Note that not all processors generate these kind of metrics.
### Changed
- `WhisperSTTService` model can now also be a string.
- Added missing * keyword separators in services.
### Fixed
- `WebsocketServerTransport` doesn't try to send frames anymore if serializers
returns `None`.
- Fixed an issue where exceptions that occurred inside frame processors were
being swallowed and not displayed.
- Fixed an issue in `FastAPIWebsocketTransport` where it would still try to send
data to the websocket after being closed.
### Other
- Added Fly.io deployment example in `examples/deployment/flyio-example`.
- Added new `17-detect-user-idle.py` example that shows how to use the new
`UserIdleProcessor`.
## [0.0.35] - 2024-06-28
### Changed
- `FastAPIWebsocketParams` now require a serializer.
- `TwilioFrameSerializer` now requires a `streamSid`.
### Fixed
- Silero VAD number of frames needs to be 512 for 16000 sample rate or 256 for
8000 sample rate.
## [0.0.34] - 2024-06-25
### Fixed
- Fixed an issue with asynchronous STT services (Deepgram and Azure) that could
interruptions to ignore transcriptions.
- Fixed an issue introduced in 0.0.33 that would cause the LLM to generate
shorter output.
## [0.0.33] - 2024-06-25
### Changed
- Upgraded to Cartesia's new Python library 1.0.0. `CartesiaTTSService` now
expects a voice ID instead of a voice name (you can get the voice ID from
Cartesia's playground). You can also specify the audio `sample_rate` and
`encoding` instead of the previous `output_format`.
### Fixed
- Fixed an issue with asynchronous STT services (Deepgram and Azure) that could
cause static audio issues and interruptions to not work properly when dealing
with multiple LLMs sentences.
- Fixed an issue that could mix new LLM responses with previous ones when
handling interruptions.
- Fixed a Daily transport blocking situation that occurred while reading audio
frames after a participant left the room. Needs daily-python >= 0.10.1.
## [0.0.32] - 2024-06-22
### Added

View File

@@ -39,7 +39,7 @@ pip install "pipecat-ai[option,...]"
Your project may or may not need these, so they're made available as optional requirements. Here is a list:
- **AI services**: `anthropic`, `azure`, `deepgram`, `google`, `fal`, `moondream`, `openai`, `openpipe`, `playht`, `silero`, `whisper`
- **AI services**: `anthropic`, `azure`, `deepgram`, `gladia`, `google`, `fal`, `moondream`, `openai`, `openpipe`, `playht`, `silero`, `whisper`, `xtts`
- **Transports**: `local`, `websocket`, `daily`
## Code examples
@@ -70,8 +70,8 @@ async def main():
transport = DailyTransport(
room_url=...,
token=...,
"Bot Name",
DailyParams(audio_out_enabled=True))
bot_name="Bot Name",
params=DailyParams(audio_out_enabled=True))
# Use Eleven Labs for Text-to-Speech
tts = ElevenLabsTTSService(
@@ -125,7 +125,7 @@ Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://
Voice Activity Detection &mdash; very important for knowing when a user has finished speaking to your bot. If you are not using press-to-talk, and want Pipecat to detect when the user has finished talking, VAD is an essential component for a natural feeling conversation.
Pipecast makes use of WebRTC VAD by default when using a WebRTC transport layer. Optionally, you can use Silero VAD for improved accuracy at the cost of higher CPU usage.
Pipecat makes use of WebRTC VAD by default when using a WebRTC transport layer. Optionally, you can use Silero VAD for improved accuracy at the cost of higher CPU usage.
```shell
pip install pipecat-ai[silero]

View File

@@ -4,5 +4,5 @@ grpcio-tools~=1.62.2
pip-tools~=7.4.1
pyright~=1.1.367
pytest~=8.2.0
setuptools~=69.5.1
setuptools~=71.1.0
setuptools_scm~=8.1.0

View File

@@ -27,6 +27,9 @@ FAL_KEY=...
# Fireworks
FIREWORKS_API_KEY=...
# Gladia
GLADIA_API_KEY=...
# PlayHT
PLAY_HT_USER_ID=...
PLAY_HT_API_KEY=...

View File

@@ -0,0 +1,13 @@
FROM python:3.11-bullseye
# Open port 7860 for http service
ENV FAST_API_PORT=7860
EXPOSE 7860
# Install Python dependencies
COPY *.py .
COPY ./requirements.txt requirements.txt
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
# Start the FastAPI server
CMD python3 bot_runner.py --port ${FAST_API_PORT}

View File

@@ -0,0 +1,39 @@
# Fly.io deployment example
This project modifies the `bot_runner.py` server to launch a new machine for each user session. This is a recommended approach for production vs. running shell processess as your deployment will quickly run out of system resources under load.
For this example, we are using Daily as a WebRTC transport and provisioning a new room and token for each session. You can use another transport, such as WebSockets, by modifying the `bot.py` and `bot_runner.py` files accordingly.
## Setting up your fly.io deployment
### Create your fly.toml file
You can copy the `example-fly.toml` as a reference. Be sure to change the app name to something unique.
### Create your .env file
Copy the base `env.example` to `.env` and enter the necessary API keys.
`FLY_APP_NAME` should match that in the `fly.toml` file.
### Launch a new fly.io project
`fly launch` or `fly launch --org your-org-name`
### Set the necessary app secrets from your .env
Note: you can do this manually via the fly.io dashboard under the "secrets" sub-section of your deployment (e.g. "https://fly.io/apps/fly-app-name/secrets") or run the following terminal command:
`cat .env | tr '\n' ' ' | xargs flyctl secrets set`
### Deploy your machine
`fly deploy`
## Connecting to your bot
Send a post request to your running fly.io instance:
`curl --location --request POST 'https://YOUR_FLY_APP_NAME/start_bot'`
This request will wait until the machine enters into a `starting` state, before returning the a room URL and token to join.

View File

@@ -0,0 +1,103 @@
import asyncio
import aiohttp
import os
import sys
import argparse
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
from pipecat.services.openai import OpenAILLMService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.vad.silero import SileroVADAnalyzer
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
daily_api_key = os.getenv("DAILY_API_KEY", "")
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
async def main(room_url: str, token: str):
async with aiohttp.ClientSession() as session:
transport = DailyTransport(
room_url,
token,
"Chatbot",
DailyParams(
api_url=daily_api_url,
api_key=daily_api_key,
audio_in_enabled=True,
audio_out_enabled=True,
camera_out_enabled=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
)
)
tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are Chatbot, a friendly, helpful robot. Your output will be converted to audio so don't include special characters other than '!' or '?' in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying hello.",
},
]
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(),
tma_in,
llm,
tts,
transport.output(),
tma_out,
])
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
if state == "left":
await task.queue_frame(EndFrame())
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Pipecat Bot")
parser.add_argument("-u", type=str, help="Room URL")
parser.add_argument("-t", type=str, help="Token")
config = parser.parse_args()
asyncio.run(main(config.u, config.t))

View File

@@ -0,0 +1,215 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import argparse
import subprocess
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pipecat.transports.services.helpers.daily_rest import (
DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomParams)
from dotenv import load_dotenv
load_dotenv(override=True)
# ------------ Configuration ------------ #
MAX_SESSION_TIME = 5 * 60 # 5 minutes
REQUIRED_ENV_VARS = [
'DAILY_API_KEY',
'OPENAI_API_KEY',
'ELEVENLABS_API_KEY',
'ELEVENLABS_VOICE_ID',
'FLY_API_KEY',
'FLY_APP_NAME',]
FLY_API_HOST = os.getenv("FLY_API_HOST", "https://api.machines.dev/v1")
FLY_APP_NAME = os.getenv("FLY_APP_NAME", "pipecat-fly-example")
FLY_API_KEY = os.getenv("FLY_API_KEY", "")
FLY_HEADERS = {
'Authorization': f"Bearer {FLY_API_KEY}",
'Content-Type': 'application/json'
}
daily_helpers = {}
# ----------------- API ----------------- #
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
aiohttp_session=aiohttp_session
)
yield
await aiohttp_session.close()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"]
)
# ----------------- Main ----------------- #
async def spawn_fly_machine(room_url: str, token: str):
async with aiohttp.ClientSession() as session:
# Use the same image as the bot runner
async with session.get(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS) as r:
if r.status != 200:
text = await r.text()
raise Exception(f"Unable to get machine info from Fly: {text}")
data = await r.json()
image = data[0]['config']['image']
# Machine configuration
cmd = f"python3 bot.py -u {room_url} -t {token}"
cmd = cmd.split()
worker_props = {
"config": {
"image": image,
"auto_destroy": True,
"init": {
"cmd": cmd
},
"restart": {
"policy": "no"
},
"guest": {
"cpu_kind": "shared",
"cpus": 1,
"memory_mb": 1024
}
},
}
# Spawn a new machine instance
async with session.post(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS, json=worker_props) as r:
if r.status != 200:
text = await r.text()
raise Exception(f"Problem starting a bot worker: {text}")
data = await r.json()
# Wait for the machine to enter the started state
vm_id = data['id']
async with session.get(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started", headers=FLY_HEADERS) as r:
if r.status != 200:
text = await r.text()
raise Exception(f"Bot was unable to enter started state: {text}")
print(f"Machine joined room: {room_url}")
@app.post("/start_bot")
async def start_bot(request: Request) -> JSONResponse:
try:
data = await request.json()
# Is this a webhook creation request?
if "test" in data:
return JSONResponse({"test": True})
except Exception as e:
pass
# Use specified room URL, or create a new one if not specified
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", "")
if not room_url:
params = DailyRoomParams(
properties=DailyRoomProperties()
)
try:
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Unable to provision room {e}")
else:
# Check passed room URL exists, we should assume that it already has a sip set up
try:
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
except Exception:
raise HTTPException(
status_code=500, detail=f"Room not found: {room_url}")
# Give the agent a token to join the session
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
if not room or not token:
raise HTTPException(
status_code=500, detail=f"Failed to get token for room: {room_url}")
# Launch a new fly.io machine, or run as a shell process (not recommended)
run_as_process = os.getenv("RUN_AS_PROCESS", False)
if run_as_process:
try:
subprocess.Popen(
[f"python3 -m bot -u {room.url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)))
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to start subprocess: {e}")
else:
try:
await spawn_fly_machine(room.url, token)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to spawn VM: {e}")
# Grab a token for the user to join with
user_token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
return JSONResponse({
"room_url": room.url,
"token": user_token,
})
if __name__ == "__main__":
# Check environment variables
for env_var in REQUIRED_ENV_VARS:
if env_var not in os.environ:
raise Exception(f"Missing environment variable: {env_var}.")
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
parser.add_argument("--host", type=str,
default=os.getenv("HOST", "0.0.0.0"), help="Host address")
parser.add_argument("--port", type=int,
default=os.getenv("PORT", 7860), help="Port number")
parser.add_argument("--reload", action="store_true",
default=False, help="Reload code on change")
config = parser.parse_args()
try:
import uvicorn
uvicorn.run(
"bot_runner:app",
host=config.host,
port=config.port,
reload=config.reload
)
except KeyboardInterrupt:
print("Pipecat runner shutting down...")

View File

@@ -0,0 +1,8 @@
DAILY_API_KEY=
DAILY_SAMPLE_ROOM_URL= # Enter a Daily room URL to use a set room URL each time (useful for local testing)
OPENAI_API_KEY=
ELEVENLABS_API_KEY=
ELEVENLABS_VOICE_ID=
FLY_API_KEY=
FLY_APP_NAME=
RUN_AS_PROCESS= # Spawn fly.io machine for each session or run as local process

View File

@@ -0,0 +1,25 @@
# fly.toml app configuration file generated for pipecat-fly-example on 2024-07-01T15:04:53+01:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#
app = 'pipecat-fly-example'
primary_region = 'sjc'
[build]
[env]
FLY_APP_NAME = 'pipecat-fly-example'
[http_service]
internal_port = 7860
force_https = true
auto_stop_machines = true
auto_start_machines = true
min_machines_running = 0
processes = ['app']
[[vm]]
memory = 512
cpu_kind = 'shared'
cpus = 1

View File

@@ -0,0 +1,5 @@
pipecat-ai[daily,openai,silero]
fastapi
uvicorn
python-dotenv
loguru

View File

@@ -6,15 +6,27 @@ provisioning a room and starting a Pipecat bot in response.
Refer to README for more information.
"""
import aiohttp
import os
import argparse
import subprocess
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomSipParams, DailyRoomParams
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, PlainTextResponse
from twilio.twiml.voice_response import VoiceResponse
from pipecat.transports.services.helpers.daily_rest import (
DailyRESTHelper,
DailyRoomObject,
DailyRoomProperties,
DailyRoomSipParams,
DailyRoomParams)
from dotenv import load_dotenv
load_dotenv(override=True)
@@ -25,14 +37,23 @@ MAX_SESSION_TIME = 5 * 60 # 5 minutes
REQUIRED_ENV_VARS = ['OPENAI_API_KEY', 'DAILY_API_KEY',
'ELEVENLABS_API_KEY', 'ELEVENLABS_VOICE_ID']
daily_rest_helper = DailyRESTHelper(
os.getenv("DAILY_API_KEY", ""),
os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'))
daily_helpers = {}
# ----------------- API ----------------- #
app = FastAPI()
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
aiohttp_session=aiohttp_session
)
yield
await aiohttp_session.close()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
@@ -53,7 +74,7 @@ action using the Twilio Client library.
"""
def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
async def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
if not room_url:
params = DailyRoomParams(
properties=DailyRoomProperties(
@@ -68,14 +89,13 @@ def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
)
print(f"Creating new room...")
room: DailyRoomObject = daily_rest_helper.create_room(params=params)
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
else:
# Check passed room URL exist (we assume that it already has a sip set up!)
try:
print(f"Joining existing room: {room_url}")
room: DailyRoomObject = daily_rest_helper.get_room_from_url(
room_url)
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
except Exception:
raise HTTPException(
status_code=500, detail=f"Room not found: {room_url}")
@@ -83,7 +103,7 @@ def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
print(f"Daily room: {room.url} {room.config.sip_endpoint}")
# Give the agent a token to join the session
token = daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
if not room or not token:
raise HTTPException(
@@ -92,11 +112,11 @@ def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in docs)
if vendor == "daily":
bot_proc = f"python3 -m bot_daily -u {room.url} -t {token} -i {
callId} -d {callDomain}"
bot_proc = f"python3 - m bot_daily - u {room.url} - t {token} - i {
callId} - d {callDomain}"
else:
bot_proc = f"python3 -m bot_twilio -u {room.url} -t {
token} -i {callId} -s {room.config.sip_endpoint}"
bot_proc = f"python3 - m bot_twilio - u {room.url} - t {
token} - i {callId} - s {room.config.sip_endpoint}"
try:
subprocess.Popen(
@@ -140,8 +160,7 @@ async def twilio_start_bot(request: Request):
# create room and tell the bot to join the created room
# note: Twilio does not require a callDomain
room: DailyRoomObject = _create_daily_room(
room_url, callId, None, "twilio")
room: DailyRoomObject = await _create_daily_room(room_url, callId, None, "twilio")
print(f"Put Twilio on hold...")
# We have the room and the SIP URI,
@@ -178,8 +197,7 @@ async def daily_start_bot(request: Request) -> JSONResponse:
detail="Missing properties 'callId' or 'callDomain'")
print(f"CallId: {callId}, CallDomain: {callDomain}")
room: DailyRoomObject = _create_daily_room(
room_url, callId, callDomain, "daily")
room: DailyRoomObject = await _create_daily_room(room_url, callId, callDomain, "daily")
# Grab a token for the user to join with
return JSONResponse({

View File

@@ -1,7 +1,5 @@
pipecat-ai[daily,openai,silero]
fastapi
uvicorn
requests
python-dotenv
loguru
twilio
twilio

View File

@@ -27,8 +27,10 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True))
@@ -52,5 +54,4 @@ async def main(room_url):
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url))
asyncio.run(main())

View File

@@ -28,8 +28,10 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
@@ -64,5 +66,4 @@ async def main(room_url):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url))
asyncio.run(main())

View File

@@ -27,8 +27,10 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
@@ -64,5 +66,4 @@ async def main(room_url):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url))
asyncio.run(main())

View File

@@ -30,8 +30,10 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(room_url, None, "Static And Dynamic Speech")
meeting = TransportServiceOutput(transport, mic_enabled=True)
@@ -82,5 +84,4 @@ async def main(room_url: str):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url))
asyncio.run(main())

View File

@@ -73,8 +73,10 @@ class MonthPrepender(FrameProcessor):
await self.push_frame(frame, direction)
async def main(room_url):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
@@ -162,5 +164,4 @@ async def main(room_url):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url))
asyncio.run(main())

View File

@@ -9,14 +9,15 @@ import aiohttp
import os
import sys
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.frames.frames import Frame, LLMMessagesFrame, MetricsFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_response import (
LLMAssistantResponseAggregator,
LLMUserResponseAggregator,
)
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.processors.logger import FrameLogger
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
@@ -34,8 +35,18 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
class MetricsLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
if isinstance(frame, MetricsFrame):
print(
f"!!! MetricsFrame: {frame}, ttfb: {frame.ttfb}, processing: {frame.processing}, tokens: {frame.tokens}, characters: {frame.characters}")
await self.push_frame(frame, direction)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -56,11 +67,10 @@ async def main(room_url: str, token):
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
model="gpt-4o"
)
fl = FrameLogger("!!! after LLM", "red")
fltts = FrameLogger("@@@ out of tts", "green")
flend = FrameLogger("### out of the end", "magenta")
ml = MetricsLogger()
messages = [
{
@@ -75,15 +85,18 @@ async def main(room_url: str, token):
transport.input(),
tma_in,
llm,
fl,
tts,
fltts,
ml,
transport.output(),
tma_out,
flend
])
task = PipelineTask(pipeline)
task = PipelineTask(pipeline, PipelineParams(
allow_interruptions=True,
enable_metrics=True,
report_only_initial_ttfb=False,
))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
@@ -99,5 +112,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -51,7 +51,7 @@ class ImageSyncAggregator(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if not isinstance(frame, SystemFrame):
if not isinstance(frame, SystemFrame) and direction == FrameDirection.DOWNSTREAM:
await self.push_frame(ImageRawFrame(image=self._speaking_image_bytes, size=(1024, 1024), format=self._speaking_image_format))
await self.push_frame(frame)
await self.push_frame(ImageRawFrame(image=self._waiting_image_bytes, size=(1024, 1024), format=self._waiting_image_format))
@@ -59,19 +59,22 @@ class ImageSyncAggregator(FrameProcessor):
await self.push_frame(frame)
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_width=1024,
camera_out_height=1024,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
vad_analyzer=SileroVADAnalyzer(),
)
)
@@ -116,7 +119,7 @@ async def main(room_url: str, token):
async def on_first_participant_joined(transport, participant):
participant_name = participant["info"]["userName"] or ''
transport.capture_participant_transcription(participant["id"])
await task.queue_frames([TextFrame(f"Hi, this is {participant_name}.")])
await task.queue_frames([TextFrame(f"Hi there {participant_name}!")])
runner = PipelineRunner()
@@ -124,5 +127,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -31,8 +31,10 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -77,6 +79,7 @@ async def main(room_url: str, token):
task = PipelineTask(pipeline, PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
))
@@ -94,5 +97,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -31,8 +31,10 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -91,5 +93,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -47,8 +47,10 @@ def get_session_history(session_id: str) -> BaseChatMessageHistory:
return message_store[session_id]
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -121,5 +123,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -31,8 +31,10 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -93,5 +95,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
@@ -31,64 +32,66 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=44100,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_sample_rate=44100,
audio_out_enabled=True,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
)
)
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_name="British Lady",
output_format="pcm_44100"
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
sample_rate=44100,
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(), # Transport user input
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out # Assistant spoken responses
])
pipeline = Pipeline([
transport.input(), # Transport user input
tma_in, # User responses
llm, # LLM
tts, # TTS
tma_out, # Goes before the transport because cartesia has word-level timestamps!
transport.output(), # Transport bot output
])
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()
runner = PipelineRunner()
await runner.run(task)
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
@@ -30,64 +31,66 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=16000,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=16000,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
)
)
)
tts = PlayHTTTSService(
user_id=os.getenv("PLAYHT_USER_ID"),
api_key=os.getenv("PLAYHT_API_KEY"),
voice_url="s3://voice-cloning-zero-shot/801a663f-efd0-4254-98d0-5c175514c3e8/jennifer/manifest.json",
)
tts = PlayHTTTSService(
user_id=os.getenv("PLAYHT_USER_ID"),
api_key=os.getenv("PLAYHT_API_KEY"),
voice_url="s3://voice-cloning-zero-shot/801a663f-efd0-4254-98d0-5c175514c3e8/jennifer/manifest.json",
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(), # Transport user input
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out # Assistant spoken responses
])
pipeline = Pipeline([
transport.input(), # Transport user input
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out # Assistant spoken responses
])
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()
runner = PipelineRunner()
await runner.run(task)
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
@@ -30,71 +31,73 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=16000,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=16000,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
)
)
)
stt = AzureSTTService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"),
)
stt = AzureSTTService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"),
)
tts = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"),
)
tts = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"),
)
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"),
)
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"),
)
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(), # Transport user input
stt, # STT
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out # Assistant spoken responses
])
pipeline = Pipeline([
transport.input(), # Transport user input
stt, # STT
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out # Assistant spoken responses
])
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()
runner = PipelineRunner()
await runner.run(task)
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
@@ -30,63 +31,65 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=24000,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=24000,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
)
)
)
tts = OpenAITTSService(
api_key=os.getenv("OPENAI_API_KEY"),
voice="alloy"
)
tts = OpenAITTSService(
api_key=os.getenv("OPENAI_API_KEY"),
voice="alloy"
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(), # Transport user input
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out # Assistant spoken responses
])
pipeline = Pipeline([
transport.input(), # Transport user input
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out # Assistant spoken responses
])
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()
runner = PipelineRunner()
await runner.run(task)
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -34,8 +34,10 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -98,5 +100,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -0,0 +1,97 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_response import (
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.services.xtts import XTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.vad.silero import SileroVADAnalyzer
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
)
)
tts = XTTSService(
aiohttp_session=session,
voice_id="Claribel Dervla",
language="en",
base_url="http://localhost:8000"
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(), # Transport user input
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out # Assistant spoken responses
])
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,102 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_response import (
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
from pipecat.services.gladia import GladiaSTTService
from pipecat.services.openai import OpenAILLMService
from pipecat.services.xtts import XTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.vad.silero import SileroVADAnalyzer
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
)
)
stt = GladiaSTTService(
api_key=os.getenv("GLADIA_API_KEY"),
)
tts = DeepgramTTSService(
aiohttp_session=session,
api_key=os.getenv("DEEPGRAM_API_KEY"),
voice="aura-helios-en"
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(), # Transport user input
stt, # STT
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out # Assistant spoken responses
])
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -22,8 +22,10 @@ logger = logging.getLogger("pipecat")
logger.setLevel(logging.DEBUG)
async def main(room_url: str):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
@@ -144,5 +146,4 @@ async def main(room_url: str):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url))
asyncio.run(main())

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import sys
@@ -23,32 +24,34 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url, token):
transport = DailyTransport(
room_url, token, "Test",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_is_live=True,
camera_out_width=1280,
camera_out_height=720
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url, token, "Test",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_is_live=True,
camera_out_width=1280,
camera_out_height=720
)
)
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_video(participant["id"])
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_video(participant["id"])
pipeline = Pipeline([transport.input(), transport.output()])
pipeline = Pipeline([transport.input(), transport.output()])
runner = PipelineRunner()
runner = PipelineRunner()
task = PipelineTask(pipeline)
task = PipelineTask(pipeline)
await runner.run(task)
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import sys
@@ -27,40 +28,44 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url, token):
tk_root = tk.Tk()
tk_root.title("Local Mirror")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
daily_transport = DailyTransport(room_url, token, "Test", DailyParams(audio_in_enabled=True))
tk_root = tk.Tk()
tk_root.title("Local Mirror")
tk_transport = TkLocalTransport(
tk_root,
TransportParams(
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_is_live=True,
camera_out_width=1280,
camera_out_height=720))
daily_transport = DailyTransport(
room_url, token, "Test", DailyParams(
audio_in_enabled=True))
@daily_transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_video(participant["id"])
tk_transport = TkLocalTransport(
tk_root,
TransportParams(
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_is_live=True,
camera_out_width=1280,
camera_out_height=720))
pipeline = Pipeline([daily_transport.input(), tk_transport.output()])
@daily_transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_video(participant["id"])
task = PipelineTask(pipeline)
pipeline = Pipeline([daily_transport.input(), tk_transport.output()])
async def run_tk():
while not task.has_finished():
tk_root.update()
tk_root.update_idletasks()
await asyncio.sleep(0.1)
task = PipelineTask(pipeline)
runner = PipelineRunner()
async def run_tk():
while not task.has_finished():
tk_root.update()
tk_root.update_idletasks()
await asyncio.sleep(0.1)
await asyncio.gather(runner.run(task), run_tk())
runner = PipelineRunner()
await asyncio.gather(runner.run(task), run_tk())
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -31,9 +31,10 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -90,5 +91,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -83,8 +83,10 @@ class InboundSoundEffectWrapper(FrameProcessor):
await self.push_frame(frame, direction)
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -148,5 +150,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -49,8 +49,10 @@ class UserImageRequester(FrameProcessor):
await self.push_frame(frame, direction)
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -108,5 +110,4 @@ async def main(room_url: str, token):
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -49,8 +49,10 @@ class UserImageRequester(FrameProcessor):
await self.push_frame(frame, direction)
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -104,5 +106,4 @@ async def main(room_url: str, token):
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -49,8 +49,10 @@ class UserImageRequester(FrameProcessor):
await self.push_frame(frame, direction)
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -104,5 +106,4 @@ async def main(room_url: str, token):
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -49,8 +49,10 @@ class UserImageRequester(FrameProcessor):
await self.push_frame(frame, direction)
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -104,5 +106,4 @@ async def main(room_url: str, token):
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import sys
@@ -35,23 +36,25 @@ class TranscriptionLogger(FrameProcessor):
print(f"Transcription: {frame.text}")
async def main(room_url: str):
transport = DailyTransport(room_url, None, "Transcription bot",
DailyParams(audio_in_enabled=True))
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
stt = WhisperSTTService()
transport = DailyTransport(room_url, None, "Transcription bot",
DailyParams(audio_in_enabled=True))
tl = TranscriptionLogger()
stt = WhisperSTTService()
pipeline = Pipeline([transport.input(), stt, tl])
tl = TranscriptionLogger()
task = PipelineTask(pipeline)
pipeline = Pipeline([transport.input(), stt, tl])
runner = PipelineRunner()
task = PipelineTask(pipeline)
await runner.run(task)
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url))
asyncio.run(main())

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
@@ -36,23 +37,25 @@ class TranscriptionLogger(FrameProcessor):
print(f"Transcription: {frame.text}")
async def main(room_url: str):
transport = DailyTransport(room_url, None, "Transcription bot",
DailyParams(audio_in_enabled=True))
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
stt = DeepgramSTTService(os.getenv("DEEPGRAM_API_KEY"))
transport = DailyTransport(room_url, None, "Transcription bot",
DailyParams(audio_in_enabled=True))
tl = TranscriptionLogger()
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
pipeline = Pipeline([transport.input(), stt, tl])
tl = TranscriptionLogger()
task = PipelineTask(pipeline)
pipeline = Pipeline([transport.input(), stt, tl])
runner = PipelineRunner()
task = PipelineTask(pipeline)
await runner.run(task)
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url))
asyncio.run(main())

View File

@@ -44,8 +44,10 @@ async def fetch_weather_from_api(llm, args):
return {"conditions": "nice", "temperature": "75"}
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -124,7 +126,7 @@ async def main(room_url: str, token):
task = PipelineTask(pipeline)
@ transport.event_handler("on_first_participant_joined")
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
@@ -134,7 +136,5 @@ async def main(room_url: str, token):
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -4,8 +4,8 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import asyncio
import os
import sys
@@ -58,15 +58,16 @@ async def barbershop_man_filter(frame) -> bool:
return current_voice == "Barbershop Man"
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Pipecat",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=44100,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
@@ -75,20 +76,17 @@ async def main(room_url: str, token):
news_lady = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_name="Newslady",
output_format="pcm_44100"
voice_id="bf991597-6c13-47e4-8411-91ec2de5c466", # Newslady
)
british_lady = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_name="British Lady",
output_format="pcm_44100"
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
)
barbershop_man = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_name="Barbershop Man",
output_format="pcm_44100"
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
)
llm = OpenAILLMService(
@@ -155,5 +153,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -55,8 +55,10 @@ async def spanish_filter(frame) -> bool:
return current_language == "Spanish"
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -149,5 +151,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -8,7 +8,6 @@ import asyncio
import aiohttp
import os
import sys
import json
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
@@ -32,8 +31,10 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -126,5 +127,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -0,0 +1,109 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_response import (
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
from pipecat.processors.frame_processor import FrameDirection
from pipecat.processors.user_idle_processor import UserIdleProcessor
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.vad.silero import SileroVADAnalyzer
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
)
)
tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
async def user_idle_callback(user_idle: UserIdleProcessor):
messages.append(
{"role": "system", "content": "Ask the user if they are still there and try to prompt for some input, but be short."})
await user_idle.queue_frame(LLMMessagesFrame(messages))
user_idle = UserIdleProcessor(callback=user_idle_callback, timeout=5.0)
pipeline = Pipeline([
transport.input(), # Transport user input
user_idle, # Idle user check-in
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out # Assistant spoken responses
])
task = PipelineTask(pipeline, PipelineParams(
allow_interruptions=True,
enable_metrics=True,
report_only_initial_ttfb=True,
))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,78 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import argparse
import sys
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure_with_args
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-i",
"--input",
type=str,
required=True,
help="Input video file")
(room_url, _, args) = await configure_with_args(session, parser)
transport = DailyTransport(
room_url,
None,
"GStreamer",
DailyParams(
audio_out_enabled=True,
audio_out_is_live=True,
camera_out_enabled=True,
camera_out_width=1280,
camera_out_height=720,
camera_out_is_live=True,
)
)
gst = GStreamerPipelineSource(
pipeline=f"filesrc location={args.input}",
out_params=GStreamerPipelineSource.OutputParams(
video_width=1280,
video_height=720,
audio_sample_rate=16000,
audio_channels=1,
)
)
pipeline = Pipeline([
gst, # GStreamer file source
transport.output(), # Transport bot output
])
task = PipelineTask(pipeline)
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,64 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import sys
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.gstreamer.pipeline_source import GStreamerPipelineSource
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
"GStreamer",
DailyParams(
camera_out_enabled=True,
camera_out_width=1280,
camera_out_height=720,
camera_out_is_live=True,
)
)
gst = GStreamerPipelineSource(
pipeline="videotestsrc ! capsfilter caps=\"video/x-raw,width=1280,height=720,framerate=30/1\"",
out_params=GStreamerPipelineSource.OutputParams(
video_width=1280,
video_height=720,
clock_sync=False))
pipeline = Pipeline([
gst, # GStreamer file source
transport.output(), # Transport bot output
])
task = PipelineTask(pipeline)
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,12 +1,26 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import argparse
import os
import time
import urllib
import requests
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
def configure():
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
async def configure(aiohttp_session: aiohttp.ClientSession):
(url, token, _) = await configure_with_args(aiohttp_session)
return (url, token)
async def configure_with_args(
aiohttp_session: aiohttp.ClientSession,
parser: argparse.ArgumentParser | None = None):
if not parser:
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u",
"--url",
@@ -33,26 +47,15 @@ def configure():
if not key:
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
room_name: str = urllib.parse.urlparse(url).path[1:]
expiration: float = time.time() + 60 * 60
expiry_time: float = 60 * 60
res: requests.Response = requests.post(
f"https://api.daily.co/v1/meeting-tokens",
headers={
"Authorization": f"Bearer {key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True,
"exp": expiration}},
)
token = await daily_rest_helper.get_token(url, expiry_time)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
token: str = res.json()["token"]
return (url, token)
return (url, token, args)

View File

@@ -1,3 +1,9 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
@@ -128,8 +134,10 @@ class ImageFilterProcessor(FrameProcessor):
await self.push_frame(frame)
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -204,5 +212,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -1,5 +1,4 @@
python-dotenv
requests
fastapi[all]
uvicorn
pipecat-ai[daily,moondream,openai,silero]

View File

@@ -1,11 +1,17 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import argparse
import os
import time
import urllib
import requests
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
def configure():
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u",
@@ -33,26 +39,16 @@ def configure():
if not key:
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
room_name: str = urllib.parse.urlparse(url).path[1:]
expiration: float = time.time() + 60 * 60
res: requests.Response = requests.post(
f"https://api.daily.co/v1/meeting-tokens",
headers={
"Authorization": f"Bearer {key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True,
"exp": expiration}},
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session
)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token: str = res.json()["token"]
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -1,31 +1,51 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import os
import argparse
import subprocess
import atexit
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
from utils.daily_helpers import create_room as _create_room, get_token
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
MAX_BOTS_PER_ROOM = 1
# Bot sub-process dict for status reporting and concurrency control
bot_procs = {}
daily_helpers = {}
def cleanup():
# Clean up function, just to be extra safe
for proc in bot_procs.values():
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
atexit.register(cleanup)
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
aiohttp_session=aiohttp_session
)
yield
await aiohttp_session.close()
cleanup()
app = FastAPI()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
@@ -39,45 +59,45 @@ app.add_middleware(
@app.get("/start")
async def start_agent(request: Request):
print(f"!!! Creating room")
room_url, room_name = _create_room()
print(f"!!! Room URL: {room_url}")
room = await daily_helpers["rest"].create_room(DailyRoomParams())
print(f"!!! Room URL: {room.url}")
# Ensure the room property is present
if not room_url:
if not room.url:
raise HTTPException(
status_code=500,
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
# Check if there is already an existing process running in this room
num_bots_in_room = sum(
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None)
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
raise HTTPException(
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
status_code=500, detail=f"Max bot limited reach for room: {room.url}")
# Get the token for the room
token = get_token(room_url)
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(
status_code=500, detail=f"Failed to get token for room: {room_url}")
status_code=500, detail=f"Failed to get token for room: {room.url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
proc = subprocess.Popen(
[
f"python3 -m bot -u {room_url} -t {token}"
f"python3 -m bot -u {room.url} -t {token}"
],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__))
)
bot_procs[proc.pid] = (proc, room_url)
bot_procs[proc.pid] = (proc, room.url)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to start subprocess: {e}")
return RedirectResponse(room_url)
return RedirectResponse(room.url)
@app.get("/status/{pid}")

View File

@@ -1,109 +0,0 @@
import urllib.parse
import os
import time
import urllib
import requests
from dotenv import load_dotenv
load_dotenv()
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
daily_api_key = os.getenv("DAILY_API_KEY")
def create_room() -> tuple[str, str]:
"""
Helper function to create a Daily room.
# See: https://docs.daily.co/reference/rest-api/rooms
Returns:
tuple: A tuple containing the room URL and room name.
Raises:
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
"""
room_props = {
"exp": time.time() + 60 * 60, # 1 hour
"enable_chat": True,
"enable_emoji_reactions": True,
"eject_at_room_exp": True,
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
}
res = requests.post(
f"https://{daily_api_path}/rooms",
headers={"Authorization": f"Bearer {daily_api_key}"},
json={
"properties": room_props
},
)
if res.status_code != 200:
raise Exception(f"Unable to create room: {res.text}")
data = res.json()
room_url: str = data.get("url")
room_name: str = data.get("name")
if room_url is None or room_name is None:
raise Exception("Missing room URL or room name in response")
return room_url, room_name
def get_name_from_url(room_url: str) -> str:
"""
Extracts the name from a given room URL.
Args:
room_url (str): The URL of the room.
Returns:
str: The extracted name from the room URL.
"""
return urllib.parse.urlparse(room_url).path[1:]
def get_token(room_url: str) -> str:
"""
Retrieves a meeting token for the specified Daily room URL.
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
Args:
room_url (str): The URL of the Daily room.
Returns:
str: The meeting token.
Raises:
Exception: If no room URL is specified or if no Daily API key is specified.
Exception: If there is an error creating the meeting token.
"""
if not room_url:
raise Exception(
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
if not daily_api_key:
raise Exception(
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
expiration: float = time.time() + 60 * 60
room_name = get_name_from_url(room_url)
res: requests.Response = requests.post(
f"https://{daily_api_path}/meeting-tokens",
headers={
"Authorization": f"Bearer {daily_api_key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True, # Owner tokens required for transcription
"exp": expiration}},
)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
token: str = res.json()["token"]
return token

View File

@@ -257,17 +257,16 @@ class IntakeProcessor:
return None
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Chatbot",
DailyParams(
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_width=1024,
camera_out_height=576,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
@@ -351,5 +350,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -1,5 +1,4 @@
python-dotenv
requests
fastapi[all]
uvicorn
pipecat-ai[daily,openai,silero]

View File

@@ -1,11 +1,17 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import argparse
import os
import time
import urllib
import requests
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
def configure():
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u",
@@ -33,26 +39,15 @@ def configure():
if not key:
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
room_name: str = urllib.parse.urlparse(url).path[1:]
expiration: float = time.time() + 60 * 60
expiry_time: float = 60 * 60
res: requests.Response = requests.post(
f"https://api.daily.co/v1/meeting-tokens",
headers={
"Authorization": f"Bearer {key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True,
"exp": expiration}},
)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
token: str = res.json()["token"]
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -1,31 +1,51 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import os
import argparse
import subprocess
import atexit
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
from utils.daily_helpers import create_room as _create_room, get_token
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
MAX_BOTS_PER_ROOM = 1
# Bot sub-process dict for status reporting and concurrency control
bot_procs = {}
daily_helpers = {}
def cleanup():
# Clean up function, just to be extra safe
for proc in bot_procs.values():
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
atexit.register(cleanup)
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
aiohttp_session=aiohttp_session
)
yield
await aiohttp_session.close()
cleanup()
app = FastAPI()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
@@ -39,45 +59,45 @@ app.add_middleware(
@app.get("/start")
async def start_agent(request: Request):
print(f"!!! Creating room")
room_url, room_name = _create_room()
print(f"!!! Room URL: {room_url}")
room = await daily_helpers["rest"].create_room(DailyRoomParams())
print(f"!!! Room URL: {room.url}")
# Ensure the room property is present
if not room_url:
if not room.url:
raise HTTPException(
status_code=500,
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
# Check if there is already an existing process running in this room
num_bots_in_room = sum(
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None)
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
raise HTTPException(
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
status_code=500, detail=f"Max bot limited reach for room: {room.url}")
# Get the token for the room
token = get_token(room_url)
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(
status_code=500, detail=f"Failed to get token for room: {room_url}")
status_code=500, detail=f"Failed to get token for room: {room.url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
proc = subprocess.Popen(
[
f"python3 -m bot -u {room_url} -t {token}"
f"python3 -m bot -u {room.url} -t {token}"
],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__))
)
bot_procs[proc.pid] = (proc, room_url)
bot_procs[proc.pid] = (proc, room.url)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to start subprocess: {e}")
return RedirectResponse(room_url)
return RedirectResponse(room.url)
@app.get("/status/{pid}")

View File

@@ -1,109 +0,0 @@
import urllib.parse
import os
import time
import urllib
import requests
from dotenv import load_dotenv
load_dotenv()
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
daily_api_key = os.getenv("DAILY_API_KEY")
def create_room() -> tuple[str, str]:
"""
Helper function to create a Daily room.
# See: https://docs.daily.co/reference/rest-api/rooms
Returns:
tuple: A tuple containing the room URL and room name.
Raises:
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
"""
room_props = {
"exp": time.time() + 60 * 60, # 1 hour
"enable_chat": True,
"enable_emoji_reactions": True,
"eject_at_room_exp": True,
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
}
res = requests.post(
f"https://{daily_api_path}/rooms",
headers={"Authorization": f"Bearer {daily_api_key}"},
json={
"properties": room_props
},
)
if res.status_code != 200:
raise Exception(f"Unable to create room: {res.text}")
data = res.json()
room_url: str = data.get("url")
room_name: str = data.get("name")
if room_url is None or room_name is None:
raise Exception("Missing room URL or room name in response")
return room_url, room_name
def get_name_from_url(room_url: str) -> str:
"""
Extracts the name from a given room URL.
Args:
room_url (str): The URL of the room.
Returns:
str: The extracted name from the room URL.
"""
return urllib.parse.urlparse(room_url).path[1:]
def get_token(room_url: str) -> str:
"""
Retrieves a meeting token for the specified Daily room URL.
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
Args:
room_url (str): The URL of the Daily room.
Returns:
str: The meeting token.
Raises:
Exception: If no room URL is specified or if no Daily API key is specified.
Exception: If there is an error creating the meeting token.
"""
if not room_url:
raise Exception(
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
if not daily_api_key:
raise Exception(
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
expiration: float = time.time() + 60 * 60
room_name = get_name_from_url(room_url)
res: requests.Response = requests.post(
f"https://{daily_api_path}/meeting-tokens",
headers={
"Authorization": f"Bearer {daily_api_key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True, # Owner tokens required for transcription
"exp": expiration}},
)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
token: str = res.json()["token"]
return token

View File

@@ -1,3 +1,9 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
@@ -77,8 +83,10 @@ class TalkingAnimation(FrameProcessor):
await self.push_frame(frame)
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -165,5 +173,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -1,5 +1,4 @@
python-dotenv
requests
fastapi[all]
uvicorn
pipecat-ai[daily,openai,silero]

View File

@@ -1,11 +1,17 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import argparse
import os
import time
import urllib
import requests
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
def configure():
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u",
@@ -33,26 +39,16 @@ def configure():
if not key:
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
room_name: str = urllib.parse.urlparse(url).path[1:]
expiration: float = time.time() + 60 * 60
res: requests.Response = requests.post(
f"https://api.daily.co/v1/meeting-tokens",
headers={
"Authorization": f"Bearer {key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True,
"exp": expiration}},
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session
)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token: str = res.json()["token"]
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -1,31 +1,51 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import os
import argparse
import subprocess
import atexit
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
from utils.daily_helpers import create_room as _create_room, get_token
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
MAX_BOTS_PER_ROOM = 1
# Bot sub-process dict for status reporting and concurrency control
bot_procs = {}
daily_helpers = {}
def cleanup():
# Clean up function, just to be extra safe
for proc in bot_procs.values():
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
atexit.register(cleanup)
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
aiohttp_session=aiohttp_session
)
yield
await aiohttp_session.close()
cleanup()
app = FastAPI()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
@@ -39,45 +59,45 @@ app.add_middleware(
@app.get("/start")
async def start_agent(request: Request):
print(f"!!! Creating room")
room_url, room_name = _create_room()
print(f"!!! Room URL: {room_url}")
room = await daily_helpers["rest"].create_room(DailyRoomParams())
print(f"!!! Room URL: {room.url}")
# Ensure the room property is present
if not room_url:
if not room.url:
raise HTTPException(
status_code=500,
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
# Check if there is already an existing process running in this room
num_bots_in_room = sum(
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None)
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
raise HTTPException(
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
status_code=500, detail=f"Max bot limited reach for room: {room.url}")
# Get the token for the room
token = get_token(room_url)
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(
status_code=500, detail=f"Failed to get token for room: {room_url}")
status_code=500, detail=f"Failed to get token for room: {room.url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
proc = subprocess.Popen(
[
f"python3 -m bot -u {room_url} -t {token}"
f"python3 -m bot -u {room.url} -t {token}"
],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__))
)
bot_procs[proc.pid] = (proc, room_url)
bot_procs[proc.pid] = (proc, room.url)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to start subprocess: {e}")
return RedirectResponse(room_url)
return RedirectResponse(room.url)
@app.get("/status/{pid}")

View File

@@ -1,109 +0,0 @@
import urllib.parse
import os
import time
import urllib
import requests
from dotenv import load_dotenv
load_dotenv()
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
daily_api_key = os.getenv("DAILY_API_KEY")
def create_room() -> tuple[str, str]:
"""
Helper function to create a Daily room.
# See: https://docs.daily.co/reference/rest-api/rooms
Returns:
tuple: A tuple containing the room URL and room name.
Raises:
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
"""
room_props = {
"exp": time.time() + 60 * 60, # 1 hour
"enable_chat": True,
"enable_emoji_reactions": True,
"eject_at_room_exp": True,
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
}
res = requests.post(
f"https://{daily_api_path}/rooms",
headers={"Authorization": f"Bearer {daily_api_key}"},
json={
"properties": room_props
},
)
if res.status_code != 200:
raise Exception(f"Unable to create room: {res.text}")
data = res.json()
room_url: str = data.get("url")
room_name: str = data.get("name")
if room_url is None or room_name is None:
raise Exception("Missing room URL or room name in response")
return room_url, room_name
def get_name_from_url(room_url: str) -> str:
"""
Extracts the name from a given room URL.
Args:
room_url (str): The URL of the room.
Returns:
str: The extracted name from the room URL.
"""
return urllib.parse.urlparse(room_url).path[1:]
def get_token(room_url: str) -> str:
"""
Retrieves a meeting token for the specified Daily room URL.
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
Args:
room_url (str): The URL of the Daily room.
Returns:
str: The meeting token.
Raises:
Exception: If no room URL is specified or if no Daily API key is specified.
Exception: If there is an error creating the meeting token.
"""
if not room_url:
raise Exception(
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
if not daily_api_key:
raise Exception(
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
expiration: float = time.time() + 60 * 60
room_name = get_name_from_url(room_url)
res: requests.Response = requests.post(
f"https://{daily_api_path}/meeting-tokens",
headers={
"Authorization": f"Bearer {daily_api_key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True, # Owner tokens required for transcription
"exp": expiration}},
)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
token: str = res.json()["token"]
return token

View File

@@ -1,4 +1,4 @@
FROM python:3.11-bullseye
FROM python:3.11-slim-bookworm
ARG DEBIAN_FRONTEND=noninteractive
ARG USE_PERSISTENT_DATA
@@ -51,4 +51,4 @@ COPY --chown=user ./frontend/ frontend/
RUN cd frontend && npm install && npm run build
# Start the FastAPI server
CMD python3 src/server.py --port ${FAST_API_PORT}
CMD python3 src/bot_runner.py --port ${FAST_API_PORT}

View File

@@ -48,6 +48,8 @@ pip install -r requirements.txt
mv env.example .env
```
When deploying to production, to ensure only this app can spawn a new bot, set your `ENV` to `production`
**Build the frontend:**
This project uses a custom frontend, which needs to built. Note: this is done automatically as part of the Docker deployment.
@@ -64,11 +66,11 @@ The build UI files can be found in `frontend/out`
Start the API / bot manager:
`python src/server.py`
`python src/bot_runner.py`
If you'd like to run a custom domain or port:
`python src/server.py --host somehost --p 7777`
`python src/bot_runner.py --host somehost --p someport`
➡️ Open the host URL in your browser `http://localhost:7860`

View File

@@ -1,5 +1,9 @@
DAILY_API_KEY=7df...
ELEVENLABS_API_KEY=aeb...
ELEVENLABS_VOICE_ID=7S...
FAL_KEY=8c...
OPENAI_API_KEY=sk-PL...
DAILY_API_KEY=
DAILY_SAMPLE_ROOM_URL=
ELEVENLABS_API_KEY=
ELEVENLABS_VOICE_ID=
FAL_KEY=
OPENAI_API_KEY=
ENV= # dev | production
RUN_AS_VM= # Set this if you want to run bots on process (not launch a new VM)

View File

@@ -27,14 +27,11 @@ export default function Call() {
// Create a new room for the story session
try {
const response = await fetch("/create", {
const response = await fetch("/start_bot", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
room_url: process.env.NEXT_PUBLIC_ROOM_URL || null,
}),
});
const { room_url, token } = await response.json();
@@ -55,21 +52,9 @@ export default function Call() {
// Disable local audio, the bot will say hello first
daily.setLocalAudio(false);
// Start the bot
const resp = await fetch("/start", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
room_url,
}),
});
setState("started");
} catch (error) {
setState("error");
leave();
}
}
@@ -79,7 +64,13 @@ export default function Call() {
}
if (state === "error") {
return <div>An Error occured</div>;
return (
<div className="flex items-center mx-auto">
<p className="text-red-500 font-semibold bg-white px-4 py-2 shadow-xl rounded-lg">
This demo is currently at capacity. Please try again later.
</p>
</div>
);
}
if (state === "started") {

View File

@@ -108,26 +108,26 @@ export default function DevicePicker({}: Props) {
{hasMicError && (
<div className="error">
{micState === "blocked" ? (
<p>
<p className="text-red-500">
Please check your browser and system permissions. Make sure that
this app is allowed to access your microphone.
</p>
) : micState === "in-use" ? (
<p>
<p className="text-red-500">
Your microphone is being used by another app. Please close any
other apps using your microphone and restart this app.
</p>
) : micState === "not-found" ? (
<p>
<p className="text-red-500">
No microphone seems to be connected. Please connect a microphone.
</p>
) : micState === "not-supported" ? (
<p>
<p className="text-red-500">
This app is not supported on your device. Please update your
software or use a different device.
</p>
) : (
<p>
<p className="text-red-500">
There seems to be an issue accessing your microphone. Try
restarting the app or consult a system administrator.
</p>

View File

@@ -1,7 +1,7 @@
import React from "react";
import { Button } from "@/components/ui/button";
import DevicePicker from "@/components/DevicePicker";
import { IconEar, IconLoader2 } from "@tabler/icons-react";
import { IconAlertCircle, IconEar, IconLoader2 } from "@tabler/icons-react";
type SetupProps = {
handleStart: () => void;
@@ -24,7 +24,6 @@ export const Setup: React.FC<SetupProps> = ({ handleStart }) => {
<h1 className="text-4xl font-bold text-pretty tracking-tighter mb-4">
Welcome to <span className="text-sky-500">Storytime</span>
</h1>
{state === "intro" ? (
<>
<p className="text-gray-600 leading-relaxed text-pretty">
@@ -38,6 +37,9 @@ export const Setup: React.FC<SetupProps> = ({ handleStart }) => {
<IconEar size={24} /> For best results, try in a quiet
environment!
</p>
<p className="flex flex-row gap-2 text-gray-600 font-medium text-red-500">
<IconAlertCircle size={24} /> This demo expires after 5 minutes.
</p>
</>
) : (
<>
@@ -49,7 +51,6 @@ export const Setup: React.FC<SetupProps> = ({ handleStart }) => {
<DevicePicker />
</>
)}
<hr className="border-gray-150 my-2" />
<Button

View File

@@ -1,2 +1 @@
NEXT_PUBLIC_ROOM_URL=
SITE_URL=

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,5 @@
async_timeout
fastapi
uvicorn
requests
python-dotenv
pipecat-ai[daily,openai,fal]

View File

@@ -5,7 +5,7 @@ import os
import sys
from pipecat.frames.frames import LLMMessagesFrame, StopTaskFrame
from pipecat.frames.frames import LLMMessagesFrame, StopTaskFrame, EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
@@ -139,6 +139,16 @@ async def main(room_url, token=None):
main_task = PipelineTask(main_pipeline)
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
intro_task.queue_frame(EndFrame())
await main_task.queue_frame(EndFrame())
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
if state == "left":
await main_task.queue_frame(EndFrame())
await runner.run(main_task)
if __name__ == "__main__":

View File

@@ -0,0 +1,251 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import argparse
import subprocess
import os
from pathlib import Path
from typing import Optional
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse, JSONResponse
from pipecat.transports.services.helpers.daily_rest import (
DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomParams)
from dotenv import load_dotenv
load_dotenv(override=True)
# ------------ Fast API Config ------------ #
MAX_SESSION_TIME = 5 * 60 # 5 minutes
daily_helpers = {}
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
aiohttp_session=aiohttp_session
)
yield
await aiohttp_session.close()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Mount the static directory
STATIC_DIR = "frontend/out"
# ------------ Fast API Routes ------------ #
app.mount("/static", StaticFiles(directory=STATIC_DIR, html=True), name="static")
@app.post("/start_bot")
async def start_bot(request: Request) -> JSONResponse:
if os.getenv("ENV", "dev") == "production":
# Only allow requests from the specified domain
host_header = request.headers.get("host")
allowed_domains = ["storytelling-chatbot.fly.dev", "www.storytelling-chatbot.fly.dev"]
# Check if the Host header matches the allowed domain
if host_header not in allowed_domains:
raise HTTPException(status_code=403, detail="Access denied")
try:
data = await request.json()
# Is this a webhook creation request?
if "test" in data:
return JSONResponse({"test": True})
except Exception as e:
pass
# Use specified room URL, or create a new one if not specified
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", "")
if not room_url:
params = DailyRoomParams(
properties=DailyRoomProperties()
)
try:
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Unable to provision room {e}")
else:
# Check passed room URL exists, we should assume that it already has a sip set up
try:
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
except Exception:
raise HTTPException(
status_code=500, detail=f"Room not found: {room_url}")
# Give the agent a token to join the session
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
if not room or not token:
raise HTTPException(
status_code=500, detail=f"Failed to get token for room: {room_url}")
# Launch a new VM, or run as a shell process (not recommended)
if os.getenv("RUN_AS_VM", False):
try:
await virtualize_bot(room.url, token)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to spawn VM: {e}")
else:
try:
subprocess.Popen(
[f"python3 -m bot -u {room.url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)))
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to start subprocess: {e}")
# Grab a token for the user to join with
user_token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
return JSONResponse({
"room_url": room.url,
"token": user_token,
})
@app.get("/{path_name:path}", response_class=FileResponse)
async def catch_all(path_name: Optional[str] = ""):
if path_name == "":
return FileResponse(f"{STATIC_DIR}/index.html")
file_path = Path(STATIC_DIR) / (path_name or "")
if file_path.is_file():
return file_path
html_file_path = file_path.with_suffix(".html")
if html_file_path.is_file():
return FileResponse(html_file_path)
raise HTTPException(status_code=450, detail="Incorrect API call")
# ------------ Virtualization ------------ #
async def virtualize_bot(room_url: str, token: str):
"""
This is an example of how to virtualize the bot using Fly.io
You can adapt this method to use whichever cloud provider you prefer.
"""
FLY_API_HOST = os.getenv("FLY_API_HOST", "https://api.machines.dev/v1")
FLY_APP_NAME = os.getenv("FLY_APP_NAME", "storytelling-chatbot")
FLY_API_KEY = os.getenv("FLY_API_KEY", "")
FLY_HEADERS = {
'Authorization': f"Bearer {FLY_API_KEY}",
'Content-Type': 'application/json'
}
async with aiohttp.ClientSession() as session:
# Use the same image as the bot runner
async with session.get(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS) as r:
if r.status != 200:
text = await r.text()
raise Exception(f"Unable to get machine info from Fly: {text}")
data = await r.json()
image = data[0]['config']['image']
# Machine configuration
cmd = f"python3 src/bot.py -u {room_url} -t {token}"
cmd = cmd.split()
worker_props = {
"config": {
"image": image,
"auto_destroy": True,
"init": {
"cmd": cmd
},
"restart": {
"policy": "no"
},
"guest": {
"cpu_kind": "shared",
"cpus": 1,
"memory_mb": 512
}
},
}
# Spawn a new machine instance
async with session.post(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS, json=worker_props) as r:
if r.status != 200:
text = await r.text()
raise Exception(f"Problem starting a bot worker: {text}")
data = await r.json()
# Wait for the machine to enter the started state
vm_id = data['id']
async with session.get(f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started", headers=FLY_HEADERS) as r:
if r.status != 200:
text = await r.text()
raise Exception(f"Bot was unable to enter started state: {text}")
print(f"Machine joined room: {room_url}")
# ------------ Main ------------ #
if __name__ == "__main__":
# Check environment variables
required_env_vars = ['OPENAI_API_KEY', 'DAILY_API_KEY',
'FAL_KEY', 'ELEVENLABS_VOICE_ID', 'ELEVENLABS_API_KEY']
for env_var in required_env_vars:
if env_var not in os.environ:
raise Exception(f"Missing environment variable: {env_var}.")
import uvicorn
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(
description="Daily Storyteller FastAPI server")
parser.add_argument("--host", type=str,
default=default_host, help="Host address")
parser.add_argument("--port", type=int,
default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true",
help="Reload code on change")
config = parser.parse_args()
uvicorn.run(
"bot_runner:app",
host=config.host,
port=config.port,
reload=config.reload
)

View File

@@ -1,175 +0,0 @@
import os
import argparse
import subprocess
import atexit
from pathlib import Path
from typing import Optional
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse, JSONResponse
from utils.daily_helpers import create_room as _create_room, get_token, get_name_from_url
MAX_BOTS_PER_ROOM = 1
# Bot sub-process dict for status reporting and concurrency control
bot_procs = {}
def cleanup():
# Clean up function, just to be extra safe
for proc in bot_procs.values():
proc.terminate()
proc.wait()
atexit.register(cleanup)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Mount the static directory
STATIC_DIR = "frontend/out"
app.mount("/static", StaticFiles(directory=STATIC_DIR, html=True), name="static")
@app.post("/create")
async def create_room(request: Request) -> JSONResponse:
data = await request.json()
if data.get('room_url') is not None:
room_url = data.get('room_url')
room_name = get_name_from_url(room_url)
else:
room_url, room_name = _create_room()
token = get_token(room_url)
return JSONResponse({"room_url": room_url, "room_name": room_name, "token": token})
@app.post("/start")
async def start_agent(request: Request) -> JSONResponse:
data = await request.json()
# Is this a webhook creation request?
if "test" in data:
return JSONResponse({"test": True})
# Ensure the room property is present
room_url = data.get('room_url')
if not room_url:
raise HTTPException(
status_code=500,
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
# Check if there is already an existing process running in this room
num_bots_in_room = sum(
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
raise HTTPException(
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
# Get the token for the room
token = get_token(room_url)
if not token:
raise HTTPException(
status_code=500, detail=f"Failed to get token for room: {room_url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
proc = subprocess.Popen(
[
f"python3 -m bot -u {room_url} -t {token}"
],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__))
)
bot_procs[proc.pid] = (proc, room_url)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to start subprocess: {e}")
return JSONResponse({"bot_id": proc.pid, "room_url": room_url})
@app.get("/status/{pid}")
def get_status(pid: int):
# Look up the subprocess
proc = bot_procs.get(pid)
# If the subprocess doesn't exist, return an error
if not proc:
raise HTTPException(
status_code=404, detail=f"Bot with process id: {pid} not found")
# Check the status of the subprocess
if proc[0].poll() is None:
status = "running"
else:
status = "finished"
return JSONResponse({"bot_id": pid, "status": status})
@app.get("/{path_name:path}", response_class=FileResponse)
async def catch_all(path_name: Optional[str] = ""):
if path_name == "":
return FileResponse(f"{STATIC_DIR}/index.html")
file_path = Path(STATIC_DIR) / (path_name or "")
if file_path.is_file():
return file_path
html_file_path = file_path.with_suffix(".html")
if html_file_path.is_file():
return FileResponse(html_file_path)
raise HTTPException(status_code=450, detail="Incorrect API call")
if __name__ == "__main__":
# Check environment variables
required_env_vars = ['OPENAI_API_KEY', 'DAILY_API_KEY',
'FAL_KEY', 'ELEVENLABS_VOICE_ID', 'ELEVENLABS_API_KEY']
for env_var in required_env_vars:
if env_var not in os.environ:
raise Exception(f"Missing environment variable: {env_var}.")
import uvicorn
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(
description="Daily Storyteller FastAPI server")
parser.add_argument("--host", type=str,
default=default_host, help="Host address")
parser.add_argument("--port", type=int,
default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true",
help="Reload code on change")
config = parser.parse_args()
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload
)

View File

@@ -1,109 +0,0 @@
import urllib.parse
import os
import time
import urllib
import requests
from dotenv import load_dotenv
load_dotenv()
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
daily_api_key = os.getenv("DAILY_API_KEY")
def create_room() -> tuple[str, str]:
"""
Helper function to create a Daily room.
# See: https://docs.daily.co/reference/rest-api/rooms
Returns:
tuple: A tuple containing the room URL and room name.
Raises:
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
"""
room_props = {
"exp": time.time() + 60 * 60, # 1 hour
"enable_chat": True,
"enable_emoji_reactions": True,
"eject_at_room_exp": True,
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
}
res = requests.post(
f"https://{daily_api_path}/rooms",
headers={"Authorization": f"Bearer {daily_api_key}"},
json={
"properties": room_props
},
)
if res.status_code != 200:
raise Exception(f"Unable to create room: {res.text}")
data = res.json()
room_url: str = data.get("url")
room_name: str = data.get("name")
if room_url is None or room_name is None:
raise Exception("Missing room URL or room name in response")
return room_url, room_name
def get_name_from_url(room_url: str) -> str:
"""
Extracts the name from a given room URL.
Args:
room_url (str): The URL of the room.
Returns:
str: The extracted name from the room URL.
"""
return urllib.parse.urlparse(room_url).path[1:]
def get_token(room_url: str) -> str:
"""
Retrieves a meeting token for the specified Daily room URL.
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
Args:
room_url (str): The URL of the Daily room.
Returns:
str: The meeting token.
Raises:
Exception: If no room URL is specified or if no Daily API key is specified.
Exception: If there is an error creating the meeting token.
"""
if not room_url:
raise Exception(
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
if not daily_api_key:
raise Exception(
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
expiration: float = time.time() + 60 * 60
room_name = get_name_from_url(room_url)
res: requests.Response = requests.post(
f"https://{daily_api_path}/meeting-tokens",
headers={
"Authorization": f"Bearer {daily_api_key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True, # Owner tokens required for transcription
"exp": expiration}},
)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
token: str = res.json()["token"]
return token

View File

@@ -1,5 +1,11 @@
import asyncio
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
@@ -12,7 +18,11 @@ from pipecat.processors.aggregators.sentence import SentenceAggregator
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.azure import AzureTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTranscriptionSettings, DailyTransport, DailyTransportMessageFrame
from pipecat.transports.services.daily import (
DailyParams,
DailyTranscriptionSettings,
DailyTransport,
DailyTransportMessageFrame)
from runner import configure
@@ -79,8 +89,10 @@ class TranslationSubtitles(FrameProcessor):
await self.push_frame(frame)
async def main(room_url: str, token):
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
@@ -133,5 +145,4 @@ async def main(room_url: str, token):
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
asyncio.run(main())

View File

@@ -1,4 +1,3 @@
python-dotenv
requests
fastapi[all]
pipecat-ai[daily,openai,azure]

View File

@@ -1,11 +1,18 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import time
import urllib
import requests
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
def configure():
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u",
@@ -33,26 +40,16 @@ def configure():
if not key:
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
room_name: str = urllib.parse.urlparse(url).path[1:]
expiration: float = time.time() + 60 * 60
res: requests.Response = requests.post(
f"https://api.daily.co/v1/meeting-tokens",
headers={
"Authorization": f"Bearer {key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True,
"exp": expiration}},
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session
)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token: str = res.json()["token"]
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -1,31 +1,51 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import os
import argparse
import subprocess
import atexit
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
from utils.daily_helpers import create_room as _create_room, get_token
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
MAX_BOTS_PER_ROOM = 1
# Bot sub-process dict for status reporting and concurrency control
bot_procs = {}
daily_helpers = {}
def cleanup():
# Clean up function, just to be extra safe
for proc in bot_procs.values():
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
atexit.register(cleanup)
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", 'https://api.daily.co/v1'),
aiohttp_session=aiohttp_session
)
yield
await aiohttp_session.close()
cleanup()
app = FastAPI()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
@@ -39,45 +59,45 @@ app.add_middleware(
@app.get("/start")
async def start_agent(request: Request):
print(f"!!! Creating room")
room_url, room_name = _create_room()
print(f"!!! Room URL: {room_url}")
room = await daily_helpers["rest"].create_room(DailyRoomParams())
print(f"!!! Room URL: {room.url}")
# Ensure the room property is present
if not room_url:
if not room.url:
raise HTTPException(
status_code=500,
detail="Missing 'room' property in request data. Cannot start agent without a target room!")
# Check if there is already an existing process running in this room
num_bots_in_room = sum(
1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None)
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None)
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
raise HTTPException(
status_code=500, detail=f"Max bot limited reach for room: {room_url}")
status_code=500, detail=f"Max bot limited reach for room: {room.url}")
# Get the token for the room
token = get_token(room_url)
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(
status_code=500, detail=f"Failed to get token for room: {room_url}")
status_code=500, detail=f"Failed to get token for room: {room.url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
proc = subprocess.Popen(
[
f"python3 -m bot -u {room_url} -t {token}"
f"python3 -m bot -u {room.url} -t {token}"
],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__))
)
bot_procs[proc.pid] = (proc, room_url)
bot_procs[proc.pid] = (proc, room.url)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to start subprocess: {e}")
return RedirectResponse(room_url)
return RedirectResponse(room.url)
@app.get("/status/{pid}")

View File

@@ -1,109 +0,0 @@
import urllib.parse
import os
import time
import urllib
import requests
from dotenv import load_dotenv
load_dotenv()
daily_api_path = os.getenv("DAILY_API_URL") or "api.daily.co/v1"
daily_api_key = os.getenv("DAILY_API_KEY")
def create_room() -> tuple[str, str]:
"""
Helper function to create a Daily room.
# See: https://docs.daily.co/reference/rest-api/rooms
Returns:
tuple: A tuple containing the room URL and room name.
Raises:
Exception: If the request to create the room fails or if the response does not contain the room URL or room name.
"""
room_props = {
"exp": time.time() + 60 * 60, # 1 hour
"enable_chat": True,
"enable_emoji_reactions": True,
"eject_at_room_exp": True,
"enable_prejoin_ui": False, # Important for the bot to be able to join headlessly
}
res = requests.post(
f"https://{daily_api_path}/rooms",
headers={"Authorization": f"Bearer {daily_api_key}"},
json={
"properties": room_props
},
)
if res.status_code != 200:
raise Exception(f"Unable to create room: {res.text}")
data = res.json()
room_url: str = data.get("url")
room_name: str = data.get("name")
if room_url is None or room_name is None:
raise Exception("Missing room URL or room name in response")
return room_url, room_name
def get_name_from_url(room_url: str) -> str:
"""
Extracts the name from a given room URL.
Args:
room_url (str): The URL of the room.
Returns:
str: The extracted name from the room URL.
"""
return urllib.parse.urlparse(room_url).path[1:]
def get_token(room_url: str) -> str:
"""
Retrieves a meeting token for the specified Daily room URL.
# See: https://docs.daily.co/reference/rest-api/meeting-tokens
Args:
room_url (str): The URL of the Daily room.
Returns:
str: The meeting token.
Raises:
Exception: If no room URL is specified or if no Daily API key is specified.
Exception: If there is an error creating the meeting token.
"""
if not room_url:
raise Exception(
"No Daily room specified. You must specify a Daily room in order a token to be generated.")
if not daily_api_key:
raise Exception(
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
expiration: float = time.time() + 60 * 60
room_name = get_name_from_url(room_url)
res: requests.Response = requests.post(
f"https://{daily_api_path}/meeting-tokens",
headers={
"Authorization": f"Bearer {daily_api_key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True, # Owner tokens required for transcription
"exp": expiration}},
)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
token: str = res.json()["token"]
return token

View File

@@ -46,10 +46,16 @@ This project is a FastAPI-based chatbot that integrates with Twilio to handle We
## Configure Twilio URLs
1. **Update the Twilio Webhook**:
1. **Start ngrok**:
In a new terminal, start ngrok to tunnel the local server:
```sh
ngrok http 8765
```
2. **Update the Twilio Webhook**:
Copy the ngrok URL and update your Twilio phone number webhook URL to `http://<ngrok_url>/start_call`.
2. **Update the streams.xml**:
3. **Update the streams.xml**:
Copy the ngrok URL and update templates/streams.xml with `wss://<ngrok_url>/ws`.
## Running the Application
@@ -61,11 +67,6 @@ This project is a FastAPI-based chatbot that integrates with Twilio to handle We
python server.py
```
2. **Start ngrok**:
In a new terminal, start ngrok to tunnel the local server:
```sh
ngrok http 8765
```
### Using Docker
1. **Build the Docker image**:

View File

@@ -15,6 +15,7 @@ from pipecat.services.deepgram import DeepgramSTTService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketTransport, FastAPIWebsocketParams
from pipecat.vad.silero import SileroVADAnalyzer
from pipecat.serializers.twilio import TwilioFrameSerializer
from loguru import logger
@@ -25,7 +26,7 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def run_bot(websocket_client):
async def run_bot(websocket_client, stream_sid):
async with aiohttp.ClientSession() as session:
transport = FastAPIWebsocketTransport(
websocket=websocket_client,
@@ -34,7 +35,8 @@ async def run_bot(websocket_client):
add_wav_header=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True
vad_audio_passthrough=True,
serializer=TwilioFrameSerializer(stream_sid)
)
)

View File

@@ -1,3 +1,5 @@
import json
import uvicorn
from fastapi import FastAPI, WebSocket
@@ -26,8 +28,13 @@ async def start_call():
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
start_data = websocket.iter_text()
await start_data.__anext__()
call_data = json.loads(await start_data.__anext__())
print(call_data, flush=True)
stream_sid = call_data['start']['streamSid']
print("WebSocket connection accepted")
await run_bot(websocket)
await run_bot(websocket, stream_sid)
if __name__ == "__main__":

View File

@@ -4,11 +4,10 @@
#
# pip-compile --all-extras pyproject.toml
#
aiofiles==23.2.1
aiofiles==24.1.0
# via deepgram-sdk
aiohttp==3.9.5
# via
# cartesia
# deepgram-sdk
# langchain
# langchain-community
@@ -17,7 +16,7 @@ aiosignal==1.3.1
# via aiohttp
annotated-types==0.7.0
# via pydantic
anthropic==0.25.9
anthropic==0.28.1
# via
# openpipe
# pipecat-ai (pyproject.toml)
@@ -36,23 +35,19 @@ attrs==23.2.0
# via
# aiohttp
# openpipe
av==12.1.0
av==12.3.0
# via faster-whisper
azure-cognitiveservices-speech==1.37.0
azure-cognitiveservices-speech==1.38.0
# via pipecat-ai (pyproject.toml)
blinker==1.8.2
# via flask
cachetools==5.3.3
cachetools==5.4.0
# via google-auth
cartesia==0.1.1
# via pipecat-ai (pyproject.toml)
certifi==2024.6.2
certifi==2024.7.4
# via
# httpcore
# httpx
# requests
cffi==1.16.0
# via sounddevice
charset-normalizer==3.3.2
# via requests
click==8.1.7
@@ -64,7 +59,7 @@ coloredlogs==15.0.1
# via onnxruntime
ctranslate2==4.3.1
# via faster-whisper
daily-python==0.10.0
daily-python==0.10.1
# via pipecat-ai (pyproject.toml)
dataclasses-json==0.6.7
# via
@@ -82,19 +77,17 @@ einops==0.8.0
# via pipecat-ai (pyproject.toml)
email-validator==2.2.0
# via fastapi
exceptiongroup==1.2.1
# via
# anyio
# pytest
fal-client==0.4.0
exceptiongroup==1.2.2
# via anyio
fal-client==0.4.1
# via pipecat-ai (pyproject.toml)
fastapi==0.111.0
fastapi==0.111.1
# via pipecat-ai (pyproject.toml)
fastapi-cli==0.0.4
fastapi-cli==0.0.5
# via fastapi
faster-whisper==1.0.2
faster-whisper==1.0.3
# via pipecat-ai (pyproject.toml)
filelock==3.15.3
filelock==3.15.4
# via
# huggingface-hub
# pyht
@@ -113,22 +106,22 @@ frozenlist==1.4.1
# via
# aiohttp
# aiosignal
fsspec==2024.6.0
fsspec==2024.6.1
# via
# huggingface-hub
# torch
future==1.0.0
# via pyloudnorm
google-ai-generativelanguage==0.6.4
google-ai-generativelanguage==0.6.6
# via google-generativeai
google-api-core[grpc]==2.19.0
google-api-core[grpc]==2.19.1
# via
# google-ai-generativelanguage
# google-api-python-client
# google-generativeai
google-api-python-client==2.134.0
google-api-python-client==2.140.0
# via google-generativeai
google-auth==2.30.0
google-auth==2.33.0
# via
# google-ai-generativelanguage
# google-api-core
@@ -137,20 +130,20 @@ google-auth==2.30.0
# google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.4
google-generativeai==0.7.2
# via pipecat-ai (pyproject.toml)
googleapis-common-protos==1.63.1
googleapis-common-protos==1.63.2
# via
# google-api-core
# grpcio-status
greenlet==3.0.3
# via sqlalchemy
grpcio==1.64.1
grpcio==1.65.4
# via
# google-api-core
# grpcio-status
# pyht
grpcio-status==1.62.2
grpcio-status==1.62.3
# via google-api-core
h11==0.14.0
# via
@@ -167,7 +160,6 @@ httptools==0.6.1
httpx==0.27.0
# via
# anthropic
# cartesia
# deepgram-sdk
# fal-client
# fastapi
@@ -175,7 +167,7 @@ httpx==0.27.0
# openpipe
httpx-sse==0.4.0
# via fal-client
huggingface-hub==0.23.4
huggingface-hub==0.24.5
# via
# faster-whisper
# timm
@@ -190,8 +182,6 @@ idna==3.7
# httpx
# requests
# yarl
iniconfig==2.0.0
# via pytest
itsdangerous==2.2.0
# via flask
jinja2==3.1.4
@@ -199,31 +189,35 @@ jinja2==3.1.4
# fastapi
# flask
# torch
jiter==0.5.0
# via anthropic
jsonpatch==1.33
# via langchain-core
jsonpointer==3.0.0
# via jsonpatch
langchain==0.2.5
langchain==0.2.12
# via
# langchain-community
# pipecat-ai (pyproject.toml)
langchain-community==0.2.5
langchain-community==0.2.11
# via pipecat-ai (pyproject.toml)
langchain-core==0.2.9
langchain-core==0.2.29
# via
# langchain
# langchain-community
# langchain-openai
# langchain-text-splitters
langchain-openai==0.1.9
langchain-openai==0.1.20
# via pipecat-ai (pyproject.toml)
langchain-text-splitters==0.2.1
langchain-text-splitters==0.2.2
# via langchain
langsmith==0.1.81
langsmith==0.1.98
# via
# langchain
# langchain-community
# langchain-core
llvmlite==0.43.0
# via numba
loguru==0.7.2
# via pipecat-ai (pyproject.toml)
markdown-it-py==3.0.0
@@ -246,14 +240,18 @@ mypy-extensions==1.0.0
# via typing-inspect
networkx==3.3
# via torch
numba==0.60.0
# via resampy
numpy==1.26.4
# via
# ctranslate2
# langchain
# langchain-community
# numba
# onnxruntime
# pipecat-ai (pyproject.toml)
# pyloudnorm
# resampy
# scipy
# torchvision
# transformers
@@ -268,7 +266,7 @@ nvidia-cuda-nvrtc-cu12==12.1.105
# via torch
nvidia-cuda-runtime-cu12==12.1.105
# via torch
nvidia-cudnn-cu12==8.9.2.26
nvidia-cudnn-cu12==9.1.0.70
# via torch
nvidia-cufft-cu12==11.0.2.54
# via torch
@@ -282,44 +280,41 @@ nvidia-cusparse-cu12==12.1.0.106
# torch
nvidia-nccl-cu12==2.20.5
# via torch
nvidia-nvjitlink-cu12==12.5.40
nvidia-nvjitlink-cu12==12.6.20
# via
# nvidia-cusolver-cu12
# nvidia-cusparse-cu12
nvidia-nvtx-cu12==12.1.105
# via torch
onnxruntime==1.18.0
# via faster-whisper
openai==1.26.0
onnxruntime==1.18.1
# via
# faster-whisper
# silero-vad
openai==1.35.15
# via
# langchain-openai
# openpipe
# pipecat-ai (pyproject.toml)
openpipe==4.14.0
openpipe==4.18.0
# via pipecat-ai (pyproject.toml)
orjson==3.10.5
# via
# fastapi
# langsmith
orjson==3.10.7
# via langsmith
packaging==24.1
# via
# huggingface-hub
# langchain-core
# marshmallow
# onnxruntime
# pytest
# transformers
pillow==10.3.0
# via
# pipecat-ai (pyproject.toml)
# torchvision
pluggy==1.5.0
# via pytest
proto-plus==1.24.0
# via
# google-ai-generativelanguage
# google-api-core
protobuf==4.25.3
protobuf==4.25.4
# via
# google-ai-generativelanguage
# google-api-core
@@ -338,9 +333,7 @@ pyasn1-modules==0.4.0
# via google-auth
pyaudio==0.2.14
# via pipecat-ai (pyproject.toml)
pycparser==2.22
# via cffi
pydantic==2.7.4
pydantic==2.8.2
# via
# anthropic
# fastapi
@@ -349,7 +342,7 @@ pydantic==2.7.4
# langchain-core
# langsmith
# openai
pydantic-core==2.18.4
pydantic-core==2.20.1
# via pydantic
pygments==2.18.0
# via rich
@@ -359,10 +352,6 @@ pyloudnorm==0.1.1
# via pipecat-ai (pyproject.toml)
pyparsing==3.1.2
# via httplib2
pytest==8.2.2
# via pytest-asyncio
pytest-asyncio==0.23.7
# via cartesia
python-dateutil==2.9.0.post0
# via openpipe
python-dotenv==1.0.1
@@ -371,7 +360,7 @@ python-dotenv==1.0.1
# uvicorn
python-multipart==0.0.9
# via fastapi
pyyaml==6.0.1
pyyaml==6.0.2
# via
# ctranslate2
# huggingface-hub
@@ -381,13 +370,12 @@ pyyaml==6.0.1
# timm
# transformers
# uvicorn
regex==2024.5.15
regex==2024.7.24
# via
# tiktoken
# transformers
requests==2.32.3
# via
# cartesia
# google-api-core
# huggingface-hub
# langchain
@@ -396,18 +384,22 @@ requests==2.32.3
# pyht
# tiktoken
# transformers
resampy==0.4.3
# via pipecat-ai (pyproject.toml)
rich==13.7.1
# via typer
rsa==4.9
# via google-auth
safetensors==0.4.3
safetensors==0.4.4
# via
# timm
# transformers
scipy==1.13.1
scipy==1.14.0
# via pyloudnorm
shellingham==1.5.4
# via typer
silero-vad==5.1
# via pipecat-ai (pyproject.toml)
six==1.16.0
# via python-dateutil
sniffio==1.3.1
@@ -416,19 +408,17 @@ sniffio==1.3.1
# anyio
# httpx
# openai
sounddevice==0.4.7
# via pipecat-ai (pyproject.toml)
sqlalchemy==2.0.31
sqlalchemy==2.0.32
# via
# langchain
# langchain-community
starlette==0.37.2
# via fastapi
sympy==1.12.1
sympy==1.13.1
# via
# onnxruntime
# torch
tenacity==8.4.1
tenacity==8.5.0
# via
# langchain
# langchain-community
@@ -442,19 +432,17 @@ tokenizers==0.19.1
# anthropic
# faster-whisper
# transformers
tomli==2.0.1
# via pytest
torch==2.3.1
torch==2.4.0
# via
# pipecat-ai (pyproject.toml)
# silero-vad
# timm
# torchaudio
# torchvision
torchaudio==2.3.1
# via pipecat-ai (pyproject.toml)
torchvision==0.18.1
torchaudio==2.4.0
# via silero-vad
torchvision==0.19.0
# via timm
tqdm==4.66.4
tqdm==4.66.5
# via
# google-generativeai
# huggingface-hub
@@ -462,7 +450,7 @@ tqdm==4.66.4
# transformers
transformers==4.40.2
# via pipecat-ai (pyproject.toml)
triton==2.3.1
triton==3.0.0
# via torch
typer==0.12.3
# via fastapi-cli
@@ -474,6 +462,7 @@ typing-extensions==4.12.2
# fastapi
# google-generativeai
# huggingface-hub
# langchain-core
# openai
# pipecat-ai (pyproject.toml)
# pydantic
@@ -485,23 +474,22 @@ typing-extensions==4.12.2
# uvicorn
typing-inspect==0.9.0
# via dataclasses-json
ujson==5.10.0
# via fastapi
uritemplate==4.1.1
# via google-api-python-client
urllib3==2.2.2
# via requests
uvicorn[standard]==0.30.1
# via fastapi
uvicorn[standard]==0.30.5
# via
# fastapi
# fastapi-cli
uvloop==0.19.0
# via uvicorn
verboselogs==1.7
# via deepgram-sdk
watchfiles==0.22.0
watchfiles==0.23.0
# via uvicorn
websockets==12.0
# via
# cartesia
# deepgram-sdk
# pipecat-ai (pyproject.toml)
# uvicorn

View File

@@ -1,14 +1,13 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --all-extras pyproject.toml
#
aiofiles==23.2.1
aiofiles==24.1.0
# via deepgram-sdk
aiohttp==3.9.5
# via
# cartesia
# deepgram-sdk
# langchain
# langchain-community
@@ -17,7 +16,7 @@ aiosignal==1.3.1
# via aiohttp
annotated-types==0.7.0
# via pydantic
anthropic==0.25.9
anthropic==0.28.1
# via
# openpipe
# pipecat-ai (pyproject.toml)
@@ -28,27 +27,27 @@ anyio==4.4.0
# openai
# starlette
# watchfiles
async-timeout==4.0.3
# via
# aiohttp
# langchain
attrs==23.2.0
# via
# aiohttp
# openpipe
av==12.1.0
av==12.3.0
# via faster-whisper
azure-cognitiveservices-speech==1.37.0
azure-cognitiveservices-speech==1.38.0
# via pipecat-ai (pyproject.toml)
blinker==1.8.2
# via flask
cachetools==5.3.3
cachetools==5.4.0
# via google-auth
cartesia==0.1.1
# via pipecat-ai (pyproject.toml)
certifi==2024.6.2
certifi==2024.7.4
# via
# httpcore
# httpx
# requests
cffi==1.16.0
# via sounddevice
charset-normalizer==3.3.2
# via requests
click==8.1.7
@@ -60,7 +59,7 @@ coloredlogs==15.0.1
# via onnxruntime
ctranslate2==4.3.1
# via faster-whisper
daily-python==0.10.0
daily-python==0.10.1
# via pipecat-ai (pyproject.toml)
dataclasses-json==0.6.7
# via
@@ -78,15 +77,17 @@ einops==0.8.0
# via pipecat-ai (pyproject.toml)
email-validator==2.2.0
# via fastapi
fal-client==0.4.0
exceptiongroup==1.2.2
# via anyio
fal-client==0.4.1
# via pipecat-ai (pyproject.toml)
fastapi==0.111.0
fastapi==0.111.1
# via pipecat-ai (pyproject.toml)
fastapi-cli==0.0.4
fastapi-cli==0.0.5
# via fastapi
faster-whisper==1.0.2
faster-whisper==1.0.3
# via pipecat-ai (pyproject.toml)
filelock==3.15.3
filelock==3.15.4
# via
# huggingface-hub
# pyht
@@ -104,22 +105,22 @@ frozenlist==1.4.1
# via
# aiohttp
# aiosignal
fsspec==2024.6.0
fsspec==2024.6.1
# via
# huggingface-hub
# torch
future==1.0.0
# via pyloudnorm
google-ai-generativelanguage==0.6.4
google-ai-generativelanguage==0.6.6
# via google-generativeai
google-api-core[grpc]==2.19.0
google-api-core[grpc]==2.19.1
# via
# google-ai-generativelanguage
# google-api-python-client
# google-generativeai
google-api-python-client==2.134.0
google-api-python-client==2.140.0
# via google-generativeai
google-auth==2.30.0
google-auth==2.33.0
# via
# google-ai-generativelanguage
# google-api-core
@@ -128,18 +129,18 @@ google-auth==2.30.0
# google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.4
google-generativeai==0.7.2
# via pipecat-ai (pyproject.toml)
googleapis-common-protos==1.63.1
googleapis-common-protos==1.63.2
# via
# google-api-core
# grpcio-status
grpcio==1.64.1
grpcio==1.65.4
# via
# google-api-core
# grpcio-status
# pyht
grpcio-status==1.62.2
grpcio-status==1.62.3
# via google-api-core
h11==0.14.0
# via
@@ -156,7 +157,6 @@ httptools==0.6.1
httpx==0.27.0
# via
# anthropic
# cartesia
# deepgram-sdk
# fal-client
# fastapi
@@ -164,7 +164,7 @@ httpx==0.27.0
# openpipe
httpx-sse==0.4.0
# via fal-client
huggingface-hub==0.23.4
huggingface-hub==0.24.5
# via
# faster-whisper
# timm
@@ -179,8 +179,6 @@ idna==3.7
# httpx
# requests
# yarl
iniconfig==2.0.0
# via pytest
itsdangerous==2.2.0
# via flask
jinja2==3.1.4
@@ -188,31 +186,35 @@ jinja2==3.1.4
# fastapi
# flask
# torch
jiter==0.5.0
# via anthropic
jsonpatch==1.33
# via langchain-core
jsonpointer==3.0.0
# via jsonpatch
langchain==0.2.5
langchain==0.2.12
# via
# langchain-community
# pipecat-ai (pyproject.toml)
langchain-community==0.2.5
langchain-community==0.2.11
# via pipecat-ai (pyproject.toml)
langchain-core==0.2.9
langchain-core==0.2.29
# via
# langchain
# langchain-community
# langchain-openai
# langchain-text-splitters
langchain-openai==0.1.9
langchain-openai==0.1.20
# via pipecat-ai (pyproject.toml)
langchain-text-splitters==0.2.1
langchain-text-splitters==0.2.2
# via langchain
langsmith==0.1.81
langsmith==0.1.98
# via
# langchain
# langchain-community
# langchain-core
llvmlite==0.43.0
# via numba
loguru==0.7.2
# via pipecat-ai (pyproject.toml)
markdown-it-py==3.0.0
@@ -235,49 +237,50 @@ mypy-extensions==1.0.0
# via typing-inspect
networkx==3.3
# via torch
numba==0.60.0
# via resampy
numpy==1.26.4
# via
# ctranslate2
# langchain
# langchain-community
# numba
# onnxruntime
# pipecat-ai (pyproject.toml)
# pyloudnorm
# resampy
# scipy
# torchvision
# transformers
onnxruntime==1.18.0
# via faster-whisper
openai==1.26.0
onnxruntime==1.18.1
# via
# faster-whisper
# silero-vad
openai==1.35.15
# via
# langchain-openai
# openpipe
# pipecat-ai (pyproject.toml)
openpipe==4.14.0
openpipe==4.18.0
# via pipecat-ai (pyproject.toml)
orjson==3.10.5
# via
# fastapi
# langsmith
orjson==3.10.7
# via langsmith
packaging==24.1
# via
# huggingface-hub
# langchain-core
# marshmallow
# onnxruntime
# pytest
# transformers
pillow==10.3.0
# via
# pipecat-ai (pyproject.toml)
# torchvision
pluggy==1.5.0
# via pytest
proto-plus==1.24.0
# via
# google-ai-generativelanguage
# google-api-core
protobuf==4.25.3
protobuf==4.25.4
# via
# google-ai-generativelanguage
# google-api-core
@@ -296,9 +299,7 @@ pyasn1-modules==0.4.0
# via google-auth
pyaudio==0.2.14
# via pipecat-ai (pyproject.toml)
pycparser==2.22
# via cffi
pydantic==2.7.4
pydantic==2.8.2
# via
# anthropic
# fastapi
@@ -307,7 +308,7 @@ pydantic==2.7.4
# langchain-core
# langsmith
# openai
pydantic-core==2.18.4
pydantic-core==2.20.1
# via pydantic
pygments==2.18.0
# via rich
@@ -317,10 +318,6 @@ pyloudnorm==0.1.1
# via pipecat-ai (pyproject.toml)
pyparsing==3.1.2
# via httplib2
pytest==8.2.2
# via pytest-asyncio
pytest-asyncio==0.23.7
# via cartesia
python-dateutil==2.9.0.post0
# via openpipe
python-dotenv==1.0.1
@@ -329,7 +326,7 @@ python-dotenv==1.0.1
# uvicorn
python-multipart==0.0.9
# via fastapi
pyyaml==6.0.1
pyyaml==6.0.2
# via
# ctranslate2
# huggingface-hub
@@ -339,13 +336,12 @@ pyyaml==6.0.1
# timm
# transformers
# uvicorn
regex==2024.5.15
regex==2024.7.24
# via
# tiktoken
# transformers
requests==2.32.3
# via
# cartesia
# google-api-core
# huggingface-hub
# langchain
@@ -354,18 +350,22 @@ requests==2.32.3
# pyht
# tiktoken
# transformers
resampy==0.4.3
# via pipecat-ai (pyproject.toml)
rich==13.7.1
# via typer
rsa==4.9
# via google-auth
safetensors==0.4.3
safetensors==0.4.4
# via
# timm
# transformers
scipy==1.13.1
scipy==1.14.0
# via pyloudnorm
shellingham==1.5.4
# via typer
silero-vad==5.1
# via pipecat-ai (pyproject.toml)
six==1.16.0
# via python-dateutil
sniffio==1.3.1
@@ -374,19 +374,17 @@ sniffio==1.3.1
# anyio
# httpx
# openai
sounddevice==0.4.7
# via pipecat-ai (pyproject.toml)
sqlalchemy==2.0.31
sqlalchemy==2.0.32
# via
# langchain
# langchain-community
starlette==0.37.2
# via fastapi
sympy==1.12.1
sympy==1.13.1
# via
# onnxruntime
# torch
tenacity==8.4.1
tenacity==8.5.0
# via
# langchain
# langchain-community
@@ -400,17 +398,17 @@ tokenizers==0.19.1
# anthropic
# faster-whisper
# transformers
torch==2.3.1
torch==2.4.0
# via
# pipecat-ai (pyproject.toml)
# silero-vad
# timm
# torchaudio
# torchvision
torchaudio==2.3.1
# via pipecat-ai (pyproject.toml)
torchvision==0.18.1
torchaudio==2.4.0
# via silero-vad
torchvision==0.19.0
# via timm
tqdm==4.66.4
tqdm==4.66.5
# via
# google-generativeai
# huggingface-hub
@@ -423,10 +421,12 @@ typer==0.12.3
typing-extensions==4.12.2
# via
# anthropic
# anyio
# deepgram-sdk
# fastapi
# google-generativeai
# huggingface-hub
# langchain-core
# openai
# pipecat-ai (pyproject.toml)
# pydantic
@@ -435,25 +435,25 @@ typing-extensions==4.12.2
# torch
# typer
# typing-inspect
# uvicorn
typing-inspect==0.9.0
# via dataclasses-json
ujson==5.10.0
# via fastapi
uritemplate==4.1.1
# via google-api-python-client
urllib3==2.2.2
# via requests
uvicorn[standard]==0.30.1
# via fastapi
uvicorn[standard]==0.30.5
# via
# fastapi
# fastapi-cli
uvloop==0.19.0
# via uvicorn
verboselogs==1.7
# via deepgram-sdk
watchfiles==0.22.0
watchfiles==0.23.0
# via uvicorn
websockets==12.0
# via
# cartesia
# deepgram-sdk
# pipecat-ai (pyproject.toml)
# uvicorn

View File

@@ -8,7 +8,7 @@ dynamic = ["version"]
description = "An open source framework for voice (and multimodal) assistants"
license = { text = "BSD 2-Clause License" }
readme = "README.md"
requires-python = ">=3.7"
requires-python = ">=3.10"
keywords = ["webrtc", "audio", "video", "ai"]
classifiers = [
"Development Status :: 5 - Production/Stable",
@@ -34,24 +34,26 @@ Source = "https://github.com/pipecat-ai/pipecat"
Website = "https://pipecat.ai"
[project.optional-dependencies]
anthropic = [ "anthropic~=0.25.7" ]
azure = [ "azure-cognitiveservices-speech~=1.37.0" ]
cartesia = [ "numpy~=1.26.0", "sounddevice", "cartesia" ]
daily = [ "daily-python~=0.10.0" ]
anthropic = [ "anthropic~=0.28.1" ]
azure = [ "azure-cognitiveservices-speech~=1.38.0" ]
cartesia = [ "websockets~=12.0" ]
daily = [ "daily-python~=0.10.1" ]
deepgram = [ "deepgram-sdk~=3.2.7" ]
examples = [ "python-dotenv~=1.0.0", "flask~=3.0.3", "flask_cors~=4.0.1" ]
fal = [ "fal-client~=0.4.0" ]
google = [ "google-generativeai~=0.5.3" ]
fireworks = [ "openai~=1.26.0" ]
langchain = [ "langchain~=0.2.1", "langchain-community~=0.2.1", "langchain-openai~=0.1.8" ]
fal = [ "fal-client~=0.4.1" ]
gladia = [ "websockets~=12.0" ]
google = [ "google-generativeai~=0.7.1" ]
fireworks = [ "openai~=1.35.0" ]
langchain = [ "langchain~=0.2.10", "langchain-community~=0.2.9", "langchain-openai~=0.1.17" ]
local = [ "pyaudio~=0.2.0" ]
moondream = [ "einops~=0.8.0", "timm~=0.9.16", "transformers~=4.40.2" ]
openai = [ "openai~=1.26.0" ]
openpipe = [ "openpipe~=4.14.0" ]
openai = [ "openai~=1.35.0" ]
openpipe = [ "openpipe~=4.18.0" ]
playht = [ "pyht~=0.0.28" ]
silero = [ "torch~=2.3.0", "torchaudio~=2.3.0" ]
silero = [ "silero-vad~=5.1" ]
websocket = [ "websockets~=12.0", "fastapi~=0.111.0" ]
whisper = [ "faster-whisper~=1.0.2" ]
whisper = [ "faster-whisper~=1.0.3" ]
xtts = [ "resampy~=0.4.3" ]
[tool.setuptools.packages.find]
# All the following settings are optional:
@@ -62,3 +64,4 @@ pythonpath = ["src"]
[tool.setuptools_scm]
local_scheme = "no-local-version"
fallback_version = "0.0.0-dev"

View File

@@ -101,7 +101,7 @@ class UserImageRawFrame(ImageRawFrame):
class SpriteFrame(Frame):
"""An animated sprite. Will be shown by the transport if the transport's
camera is enabled. Will play at the framerate specified in the transport's
`fps` constructor parameter.
`camera_out_framerate` constructor parameter.
"""
images: List[ImageRawFrame]
@@ -158,6 +158,34 @@ class LLMMessagesFrame(DataFrame):
messages: List[dict]
@dataclass
class LLMMessagesAppendFrame(DataFrame):
"""A frame containing a list of LLM messages that neeed to be added to the
current context.
"""
messages: List[dict]
@dataclass
class LLMMessagesUpdateFrame(DataFrame):
"""A frame containing a list of new LLM messages. These messages will
replace the current context LLM messages and should generate a new
LLMMessagesFrame.
"""
messages: List[dict]
@dataclass
class TTSSpeakFrame(DataFrame):
"""A frame that contains a text that should be spoken by the TTS in the
pipeline (if any).
"""
text: str
@dataclass
class TransportMessageFrame(DataFrame):
message: Any
@@ -184,14 +212,6 @@ class SystemFrame(Frame):
pass
@dataclass
class StartFrame(SystemFrame):
"""This is the first frame that should be pushed down a pipeline."""
allow_interruptions: bool = False
enable_metrics: bool = False
report_only_initial_ttfb: bool = False
@dataclass
class CancelFrame(SystemFrame):
"""Indicates that a pipeline needs to stop right away."""
@@ -240,12 +260,24 @@ class StopInterruptionFrame(SystemFrame):
pass
@dataclass
class BotInterruptionFrame(SystemFrame):
"""Emitted by when the bot should be interrupted. This will mainly cause the
same actions as if the user interrupted except that the
UserStartedSpeakingFrame and UserStoppedSpeakingFrame won't be generated.
"""
pass
@dataclass
class MetricsFrame(SystemFrame):
"""Emitted by processor that can compute metrics like latencies.
"""
ttfb: Mapping[str, float]
ttfb: List[Mapping[str, Any]] | None = None
processing: List[Mapping[str, Any]] | None = None
tokens: List[Mapping[str, Any]] | None = None
characters: List[Mapping[str, Any]] | None = None
#
# Control frames
@@ -257,6 +289,15 @@ class ControlFrame(Frame):
pass
@dataclass
class StartFrame(ControlFrame):
"""This is the first frame that should be pushed down a pipeline."""
allow_interruptions: bool = False
enable_metrics: bool = False
enable_usage_metrics: bool = False
report_only_initial_ttfb: bool = False
@dataclass
class EndFrame(ControlFrame):
"""Indicates that a pipeline has ended and frame processors and pipelines
@@ -271,27 +312,13 @@ class EndFrame(ControlFrame):
@dataclass
class LLMFullResponseStartFrame(ControlFrame):
"""Used to indicate the beginning of a full LLM response. Following
LLMResponseStartFrame, TextFrame and LLMResponseEndFrame for each sentence
until a LLMFullResponseEndFrame."""
"""Used to indicate the beginning of an LLM response. Following by one or
more TextFrame and a final LLMFullResponseEndFrame."""
pass
@dataclass
class LLMFullResponseEndFrame(ControlFrame):
"""Indicates the end of a full LLM response."""
pass
@dataclass
class LLMResponseStartFrame(ControlFrame):
"""Used to indicate the beginning of an LLM response. Following TextFrames
are part of the LLM response until an LLMResponseEndFrame"""
pass
@dataclass
class LLMResponseEndFrame(ControlFrame):
"""Indicates the end of an LLM response."""
pass
@@ -313,6 +340,33 @@ class UserStoppedSpeakingFrame(ControlFrame):
pass
@dataclass
class BotStartedSpeakingFrame(ControlFrame):
"""Emitted upstream by transport outputs to indicate the bot started speaking.
"""
pass
@dataclass
class BotStoppedSpeakingFrame(ControlFrame):
"""Emitted upstream by transport outputs to indicate the bot stopped speaking.
"""
pass
@dataclass
class BotSpeakingFrame(ControlFrame):
"""Emitted upstream by transport outputs while the bot is still
speaking. This can be used, for example, to detect when a user is idle. That
is, while the bot is speaking we don't want to trigger any user idle timeout
since the user might be listening.
"""
pass
@dataclass
class TTSStartedFrame(ControlFrame):
"""Used to indicate the beginning of a TTS response. Following
@@ -338,3 +392,17 @@ class UserImageRequestFrame(ControlFrame):
def __str__(self):
return f"{self.name}, user: {self.user_id}"
@dataclass
class LLMModelUpdateFrame(ControlFrame):
"""A control frame containing a request to update to a new LLM model.
"""
model: str
@dataclass
class TTSVoiceUpdateFrame(ControlFrame):
"""A control frame containing a request to update to a new TTS voice.
"""
voice: str

View File

@@ -64,7 +64,7 @@ class Pipeline(BasePipeline):
services = []
for p in self._processors:
if isinstance(p, BasePipeline):
services += p.processors_with_metrics()
services.extend(p.processors_with_metrics())
elif p.can_generate_metrics():
services.append(p)
return services
@@ -91,5 +91,7 @@ class Pipeline(BasePipeline):
def _link_processors(self):
prev = self._processors[0]
for curr in self._processors[1:]:
prev.set_parent(self)
prev.link(curr)
prev = curr
prev.set_parent(self)

View File

@@ -15,7 +15,7 @@ from loguru import logger
class PipelineRunner:
def __init__(self, name: str | None = None, handle_sigint: bool = True):
def __init__(self, *, name: str | None = None, handle_sigint: bool = True):
self.id: int = obj_id()
self.name: str = name or f"{self.__class__.__name__}#{obj_count(self)}"

View File

@@ -21,6 +21,8 @@ from loguru import logger
class PipelineParams(BaseModel):
allow_interruptions: bool = False
enable_metrics: bool = False
enable_usage_metrics: bool = False
send_initial_empty_metrics: bool = True
report_only_initial_ttfb: bool = False
@@ -95,17 +97,21 @@ class PipelineTask:
def _initial_metrics_frame(self) -> MetricsFrame:
processors = self._pipeline.processors_with_metrics()
ttfb = dict(zip([p.name for p in processors], [0] * len(processors)))
return MetricsFrame(ttfb=ttfb)
ttfb = [{"processor": p.name, "value": 0.0} for p in processors]
processing = [{"processor": p.name, "value": 0.0} for p in processors]
return MetricsFrame(ttfb=ttfb, processing=processing)
async def _process_down_queue(self):
start_frame = StartFrame(
allow_interruptions=self._params.allow_interruptions,
enable_metrics=self._params.enable_metrics,
enable_usage_metrics=self._params.enable_metrics,
report_only_initial_ttfb=self._params.report_only_initial_ttfb
)
await self._source.process_frame(start_frame, FrameDirection.DOWNSTREAM)
await self._source.process_frame(self._initial_metrics_frame(), FrameDirection.DOWNSTREAM)
if self._params.send_initial_empty_metrics:
await self._source.process_frame(self._initial_metrics_frame(), FrameDirection.DOWNSTREAM)
running = True
should_cleanup = True

View File

@@ -14,9 +14,9 @@ from pipecat.frames.frames import (
InterimTranscriptionFrame,
LLMFullResponseEndFrame,
LLMFullResponseStartFrame,
LLMResponseEndFrame,
LLMResponseStartFrame,
LLMMessagesAppendFrame,
LLMMessagesFrame,
LLMMessagesUpdateFrame,
StartInterruptionFrame,
TranscriptionFrame,
TextFrame,
@@ -122,6 +122,19 @@ class LLMResponseAggregator(FrameProcessor):
# Reset anyways
self._reset()
await self.push_frame(frame, direction)
elif isinstance(frame, LLMMessagesAppendFrame):
self._messages.extend(frame.messages)
messages_frame = LLMMessagesFrame(self._messages)
await self.push_frame(messages_frame)
elif isinstance(frame, LLMMessagesUpdateFrame):
# We push the frame downstream so the assistant aggregator gets
# updated as well.
await self.push_frame(frame)
# We can now reset this one.
self._reset()
self._messages = frame.messages
messages_frame = LLMMessagesFrame(self._messages)
await self.push_frame(messages_frame)
else:
await self.push_frame(frame, direction)
@@ -173,7 +186,7 @@ class LLMUserResponseAggregator(LLMResponseAggregator):
class LLMFullResponseAggregator(FrameProcessor):
"""This class aggregates Text frames until it receives a
LLMResponseEndFrame, then emits the concatenated text as
LLMFullResponseEndFrame, then emits the concatenated text as
a single text frame.
given the following frames:
@@ -182,12 +195,12 @@ class LLMFullResponseAggregator(FrameProcessor):
TextFrame(" world.")
TextFrame(" I am")
TextFrame(" an LLM.")
LLMResponseEndFrame()]
LLMFullResponseEndFrame()]
this processor will yield nothing for the first 4 frames, then
TextFrame("Hello, world. I am an LLM.")
LLMResponseEndFrame()
LLMFullResponseEndFrame()
when passed the last frame.
@@ -203,9 +216,9 @@ class LLMFullResponseAggregator(FrameProcessor):
>>> asyncio.run(print_frames(aggregator, TextFrame(" world.")))
>>> asyncio.run(print_frames(aggregator, TextFrame(" I am")))
>>> asyncio.run(print_frames(aggregator, TextFrame(" an LLM.")))
>>> asyncio.run(print_frames(aggregator, LLMResponseEndFrame()))
>>> asyncio.run(print_frames(aggregator, LLMFullResponseEndFrame()))
Hello, world. I am an LLM.
LLMResponseEndFrame
LLMFullResponseEndFrame
"""
def __init__(self):
@@ -234,6 +247,11 @@ class LLMContextAggregator(LLMResponseAggregator):
async def _push_aggregation(self):
if len(self._aggregation) > 0:
self._context.add_message({"role": self._role, "content": self._aggregation})
# Reset the aggregation. Reset it before pushing it down, otherwise
# if the tasks gets cancelled we won't be able to clear things up.
self._aggregation = ""
frame = OpenAILLMContextFrame(self._context)
await self.push_frame(frame)
@@ -247,9 +265,10 @@ class LLMAssistantContextAggregator(LLMContextAggregator):
messages=[],
context=context,
role="assistant",
start_frame=LLMResponseStartFrame,
end_frame=LLMResponseEndFrame,
accumulator_frame=TextFrame
start_frame=LLMFullResponseStartFrame,
end_frame=LLMFullResponseEndFrame,
accumulator_frame=TextFrame,
handle_interruptions=True
)

View File

@@ -0,0 +1,64 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
from pipecat.frames.frames import EndFrame, Frame, StartInterruptionFrame
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
class AsyncFrameProcessor(FrameProcessor):
def __init__(
self,
*,
name: str | None = None,
loop: asyncio.AbstractEventLoop | None = None,
**kwargs):
super().__init__(name=name, loop=loop, **kwargs)
self._create_push_task()
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, StartInterruptionFrame):
await self._handle_interruptions(frame)
async def queue_frame(
self,
frame: Frame,
direction: FrameDirection = FrameDirection.DOWNSTREAM):
await self._push_queue.put((frame, direction))
async def cleanup(self):
self._push_frame_task.cancel()
await self._push_frame_task
async def _handle_interruptions(self, frame: Frame):
# Cancel the task. This will stop pushing frames downstream.
self._push_frame_task.cancel()
await self._push_frame_task
# Push an out-of-band frame (i.e. not using the ordered push
# frame task).
await self.push_frame(frame)
# Create a new queue and task.
self._create_push_task()
def _create_push_task(self):
self._push_queue = asyncio.Queue()
self._push_frame_task = self.get_event_loop().create_task(self._push_frame_task_handler())
async def _push_frame_task_handler(self):
running = True
while running:
try:
(frame, direction) = await self._push_queue.get()
await self.push_frame(frame, direction)
running = not isinstance(frame, EndFrame)
self._push_queue.task_done()
except asyncio.CancelledError:
break

View File

@@ -82,5 +82,5 @@ class WakeCheckFilter(FrameProcessor):
await self.push_frame(frame, direction)
except Exception as e:
error_msg = f"Error in wake word filter: {e}"
logger.error(error_msg)
logger.exception(error_msg)
await self.push_error(ErrorFrame(error_msg))

View File

@@ -9,7 +9,7 @@ import time
from enum import Enum
from pipecat.frames.frames import ErrorFrame, Frame, MetricsFrame, StartFrame, UserStoppedSpeakingFrame
from pipecat.frames.frames import ErrorFrame, Frame, MetricsFrame, StartFrame, StartInterruptionFrame, UserStoppedSpeakingFrame
from pipecat.utils.utils import obj_count, obj_id
from loguru import logger
@@ -20,15 +20,72 @@ class FrameDirection(Enum):
UPSTREAM = 2
class FrameProcessorMetrics:
def __init__(self, name: str):
self._name = name
self._start_ttfb_time = 0
self._start_processing_time = 0
self._should_report_ttfb = True
async def start_ttfb_metrics(self, report_only_initial_ttfb):
if self._should_report_ttfb:
self._start_ttfb_time = time.time()
self._should_report_ttfb = not report_only_initial_ttfb
async def stop_ttfb_metrics(self):
if self._start_ttfb_time == 0:
return None
value = time.time() - self._start_ttfb_time
logger.debug(f"{self._name} TTFB: {value}")
ttfb = {
"processor": self._name,
"value": value
}
self._start_ttfb_time = 0
return MetricsFrame(ttfb=[ttfb])
async def start_processing_metrics(self):
self._start_processing_time = time.time()
async def stop_processing_metrics(self):
if self._start_processing_time == 0:
return None
value = time.time() - self._start_processing_time
logger.debug(f"{self._name} processing time: {value}")
processing = {
"processor": self._name,
"value": value
}
self._start_processing_time = 0
return MetricsFrame(processing=[processing])
async def start_llm_usage_metrics(self, tokens: dict):
logger.debug(
f"{self._name} prompt tokens: {tokens['prompt_tokens']}, completion tokens: {tokens['completion_tokens']}")
return MetricsFrame(tokens=[tokens])
async def start_tts_usage_metrics(self, text: str):
characters = {
"processor": self._name,
"value": len(text),
}
logger.debug(f"{self._name} usage characters: {characters['value']}")
return MetricsFrame(characters=[characters])
class FrameProcessor:
def __init__(
self,
*,
name: str | None = None,
loop: asyncio.AbstractEventLoop | None = None,
**kwargs):
self.id: int = obj_id()
self.name = name or f"{self.__class__.__name__}#{obj_count(self)}"
self._parent: "FrameProcessor" | None = None
self._prev: "FrameProcessor" | None = None
self._next: "FrameProcessor" | None = None
self._loop: asyncio.AbstractEventLoop = loop or asyncio.get_running_loop()
@@ -36,11 +93,11 @@ class FrameProcessor:
# Properties
self._allow_interruptions = False
self._enable_metrics = False
self._enable_usage_metrics = False
self._report_only_initial_ttfb = False
# Metrics
self._start_ttfb_time = 0
self._should_report_ttfb = True
self._metrics = FrameProcessorMetrics(name=self.name)
@property
def interruptions_allowed(self):
@@ -50,6 +107,10 @@ class FrameProcessor:
def metrics_enabled(self):
return self._enable_metrics
@property
def usage_metrics_enabled(self):
return self._enable_usage_metrics
@property
def report_only_initial_ttfb(self):
return self._report_only_initial_ttfb
@@ -58,21 +119,45 @@ class FrameProcessor:
return False
async def start_ttfb_metrics(self):
if self.metrics_enabled and self._should_report_ttfb:
self._start_ttfb_time = time.time()
self._should_report_ttfb = not self._report_only_initial_ttfb
if self.can_generate_metrics() and self.metrics_enabled:
await self._metrics.start_ttfb_metrics(self._report_only_initial_ttfb)
async def stop_ttfb_metrics(self):
if self.metrics_enabled and self._start_ttfb_time > 0:
ttfb = time.time() - self._start_ttfb_time
logger.debug(f"{self.name} TTFB: {ttfb}")
await self.push_frame(MetricsFrame(ttfb={self.name: ttfb}))
self._start_ttfb_time = 0
if self.can_generate_metrics() and self.metrics_enabled:
frame = await self._metrics.stop_ttfb_metrics()
if frame:
await self.push_frame(frame)
async def start_processing_metrics(self):
if self.can_generate_metrics() and self.metrics_enabled:
await self._metrics.start_processing_metrics()
async def stop_processing_metrics(self):
if self.can_generate_metrics() and self.metrics_enabled:
frame = await self._metrics.stop_processing_metrics()
if frame:
await self.push_frame(frame)
async def start_llm_usage_metrics(self, tokens: dict):
if self.can_generate_metrics() and self.usage_metrics_enabled:
frame = await self._metrics.start_llm_usage_metrics(tokens)
if frame:
await self.push_frame(frame)
async def start_tts_usage_metrics(self, text: str):
if self.can_generate_metrics() and self.usage_metrics_enabled:
frame = await self._metrics.start_tts_usage_metrics(text)
if frame:
await self.push_frame(frame)
async def stop_all_metrics(self):
await self.stop_ttfb_metrics()
await self.stop_processing_metrics()
async def cleanup(self):
pass
def link(self, processor: 'FrameProcessor'):
def link(self, processor: "FrameProcessor"):
self._next = processor
processor._prev = self
logger.debug(f"Linking {self} -> {self._next}")
@@ -80,11 +165,20 @@ class FrameProcessor:
def get_event_loop(self) -> asyncio.AbstractEventLoop:
return self._loop
def set_parent(self, parent: "FrameProcessor"):
self._parent = parent
def get_parent(self) -> "FrameProcessor":
return self._parent
async def process_frame(self, frame: Frame, direction: FrameDirection):
if isinstance(frame, StartFrame):
self._allow_interruptions = frame.allow_interruptions
self._enable_metrics = frame.enable_metrics
self._enable_usage_metrics = frame.enable_usage_metrics
self._report_only_initial_ttfb = frame.report_only_initial_ttfb
elif isinstance(frame, StartInterruptionFrame):
await self.stop_all_metrics()
elif isinstance(frame, UserStoppedSpeakingFrame):
self._should_report_ttfb = True
@@ -92,12 +186,15 @@ class FrameProcessor:
await self.push_frame(error, FrameDirection.UPSTREAM)
async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
if direction == FrameDirection.DOWNSTREAM and self._next:
logger.trace(f"Pushing {frame} from {self} to {self._next}")
await self._next.process_frame(frame, direction)
elif direction == FrameDirection.UPSTREAM and self._prev:
logger.trace(f"Pushing {frame} upstream from {self} to {self._prev}")
await self._prev.process_frame(frame, direction)
try:
if direction == FrameDirection.DOWNSTREAM and self._next:
logger.trace(f"Pushing {frame} from {self} to {self._next}")
await self._next.process_frame(frame, direction)
elif direction == FrameDirection.UPSTREAM and self._prev:
logger.trace(f"Pushing {frame} upstream from {self} to {self._prev}")
await self._prev.process_frame(frame, direction)
except Exception as e:
logger.exception(f"Uncaught exception in {self}: {e}")
def __str__(self):
return self.name

Some files were not shown because too many files have changed in this diff Show More