Compare commits

...

443 Commits

Author SHA1 Message Date
Mark Backman
6531858970 Revert LLMRunFrame for quickstart 2025-09-01 20:48:47 -04:00
Mark Backman
64486ef50b Merge pull request #2536 from gladiaio/PLA-38-missing-config-parameters
Gladia - add missing config parameters
2025-09-01 12:42:10 -07:00
Fabrice Lamant
802c5d04f4 update changelog 2025-09-01 10:21:11 +02:00
Aleix Conchillo Flaqué
83b90da53a Merge pull request #2537 from pipecat-ai/aleix/pipeline-task-cleanup-observers
PipelineTask: cleanup observers
2025-08-31 13:44:38 -07:00
Aleix Conchillo Flaqué
1f49de5cdf Merge pull request #2542 from pipecat-ai/aleix/remove-stop-interruption-frame
frames: remove StopInterruptionFrame
2025-08-31 13:44:22 -07:00
Mark Backman
7cf099eae7 Merge pull request #2541 from parshvadaftari/user/parshva/update_mem0
Update mem0 integration
2025-08-30 05:11:31 -07:00
Mark Backman
93a8ea3cb2 Merge pull request #2543 from pipecat-ai/mb/docs-extensions
Add Extensions to ref docs generation
2025-08-30 04:20:03 -07:00
Aleix Conchillo Flaqué
776aafddfb Merge pull request #2534 from pipecat-ai/aleix/pyright-1.1.404
pyproject: update pyright and ruff
2025-08-29 19:55:54 -07:00
Mark Backman
d56762262a Fix docs build errors 2025-08-29 20:24:35 -04:00
Mark Backman
bbcf35d657 Add Extensions to reference docs generation 2025-08-29 20:17:34 -04:00
Mark Backman
972546b24f Add IVR navigation (#2529) 2025-08-29 20:08:17 -04:00
Aleix Conchillo Flaqué
8b351f5bec pyproject: update pyright and ruff 2025-08-29 17:02:13 -07:00
Aleix Conchillo Flaqué
bd7d9346b7 frames: remove StopInterruptionFrame 2025-08-29 16:40:01 -07:00
Aleix Conchillo Flaqué
81325be4f3 Merge pull request #2540 from pipecat-ai/aleix/dtmf-tones-slower
audio(dtmf): use longer tones and longer gaps
2025-08-29 15:15:01 -07:00
Aleix Conchillo Flaqué
399f8de6ef audio(dtmf): use longer tones and longer gaps 2025-08-29 15:10:20 -07:00
parshvadaftari
60c070e077 update mem0 integration for reduced latency and better performance 2025-08-30 02:27:36 +05:30
Aleix Conchillo Flaqué
b5a644dd6f PipelineTask: cleanup observers 2025-08-29 10:54:36 -07:00
Fabrice Lamant
25b595e125 add suggestions 2025-08-29 14:51:20 +02:00
Fabrice Lamant
edc8cc1e69 remove sample_rate from GladiaInputParams 2025-08-29 14:00:00 +02:00
Fabrice Lamant
633dd69dee feat: add logging for pipecat version and session url 2025-08-29 13:47:16 +02:00
Fabrice Lamant
1a1d5a1081 feat: add missing config params 2025-08-29 13:46:44 +02:00
Aleix Conchillo Flaqué
c1b8d2acab Merge pull request #2532 from pipecat-ai/aleix/universal-dtmf-support
Universal DTMF support
2025-08-28 21:04:13 -07:00
Aleix Conchillo Flaqué
ea368e4c5f scripts(dtmf): added generate_dtmf.sh to generate DTMF wav files 2025-08-28 21:01:41 -07:00
Aleix Conchillo Flaqué
f03deb6ecc DailyTransport: remove send_dtmf() and write_dtmf() 2025-08-28 21:01:41 -07:00
Aleix Conchillo Flaqué
0e01ac8ef6 BaseOutputTransport: implement generic write_dtmf() 2025-08-28 21:01:41 -07:00
Aleix Conchillo Flaqué
5787743ab3 audio(dtmf): added DTMF audio files and load_dtmf_audio() 2025-08-28 21:01:41 -07:00
Aleix Conchillo Flaqué
79be0695dd make sure warnings are always displayed 2025-08-28 17:43:29 -07:00
Aleix Conchillo Flaqué
a5c5e069ba move pipecat.frames.frames.KeypadEntry to pipecat.audio.dtmf.types.KeypadEntry 2025-08-28 17:43:29 -07:00
Aleix Conchillo Flaqué
77c34076f7 Merge pull request #2531 from pipecat-ai/aleix/pipecat-0.0.82
update CHANGELOG for 0.0.82
2025-08-28 13:04:41 -07:00
Aleix Conchillo Flaqué
d67cece356 update CHANGELOG for 0.0.82 2025-08-28 13:02:47 -07:00
Aleix Conchillo Flaqué
275c8b59c5 MistralLLMService: fix build_chat_completion_params() 2025-08-28 12:04:14 -07:00
Aleix Conchillo Flaqué
5ebcea2a3b scripts(eval): change "result" function call parameter 2025-08-28 11:38:59 -07:00
Aleix Conchillo Flaqué
64f2135ddc examples(14f): use default models 2025-08-28 11:38:59 -07:00
kompfner
a74231f036 Merge pull request #2515 from pipecat-ai/pk/llm-run-frame
Add `LLMRunFrame` to trigger an LLM response, replacing `context_aggr…
2025-08-28 10:01:00 -04:00
Paul Kompfner
189749b579 Add LLMRunFrame to trigger an LLM response, replacing context_aggregator.user().get_context_frame() 2025-08-28 09:53:33 -04:00
Aleix Conchillo Flaqué
e384ca949e Merge pull request #2512 from pipecat-ai/aleix/textframe-skip-tts
TextFrame: add skip_tts field
2025-08-27 16:26:03 -07:00
Aleix Conchillo Flaqué
eb248fedc1 add skip_tts to LLMFullResponseStartFrame/LLMFullResponseEndFrame 2025-08-27 16:23:27 -07:00
Aleix Conchillo Flaqué
16f57be72c LLMConfigureOutputFrame: allow configuring LLM output 2025-08-27 16:23:27 -07:00
Aleix Conchillo Flaqué
5803936838 TextFrame: add skip_tts field
This lets a text frame bypass TTS while still being included in the LLM
context. Useful for cases like structured text that isn’t meant to be spoken but
should still contribute to context.
2025-08-27 16:23:27 -07:00
Aleix Conchillo Flaqué
d9837dd1e5 Merge pull request #2527 from pipecat-ai/aleix/daily-python-0.19.8
pyproject: update daily-python to 0.19.8
2025-08-27 16:22:49 -07:00
Aleix Conchillo Flaqué
e48c9fc3e2 pyproject: update daily-python to 0.19.8 2025-08-27 16:00:36 -07:00
Aleix Conchillo Flaqué
3c4454a33e Merge pull request #2526 from pipecat-ai/aleix/pipeline-task-wait-for-startframe
PipelineTask: wait for StartFrame to reach end of pipeline
2025-08-27 15:57:10 -07:00
Aleix Conchillo Flaqué
2a0780e6ef PipelineTask: wait for StartFrame to reach end of pipeline
Fixes #2498
2025-08-27 14:23:09 -07:00
Aleix Conchillo Flaqué
5e121346fb Merge pull request #2516 from pipecat-ai/aleix/rtvi-client-version-check
RTVIProcessor: make check sure client version is set
2025-08-27 14:02:14 -07:00
Aleix Conchillo Flaqué
2bdca8d22c RTVIProcessor: make check sure client version is set 2025-08-27 13:36:11 -07:00
Aleix Conchillo Flaqué
1f5888bcf7 Merge pull request #2517 from pipecat-ai/aleix/unify-get-messages-for-logging
unify get_messages_for_logging()
2025-08-27 12:49:36 -07:00
Mark Backman
3d09f9a2af Merge pull request #2524 from pipecat-ai/mb/cartesia-speed
Cartesia: update speed InputParam
2025-08-27 12:47:29 -07:00
Aleix Conchillo Flaqué
cd3563bb16 unify get_messages_for_logging()
Some implementations were returing a list and some were returning a JSON
string. They should all return a list and the user would decide if it wants to
transform that into JSON.
2025-08-27 12:45:24 -07:00
Aleix Conchillo Flaqué
3e79ef4118 Merge pull request #2525 from pipecat-ai/aleix/daily-fix-send-dtmf
DailyTransport: fix sending DTMF tones
2025-08-27 12:44:27 -07:00
Aleix Conchillo Flaqué
2613da1a1f PipelineTask: increase CANCEL_TIMEOUT_SECS to 20 2025-08-27 11:50:48 -07:00
Aleix Conchillo Flaqué
41d40f9a11 DailyTransport: make sure we have a client before joining/leaving 2025-08-27 11:50:48 -07:00
Aleix Conchillo Flaqué
74af2b6aa4 DailyTransport: fix sending DTMF tones 2025-08-27 11:50:48 -07:00
Mark Backman
f7d9f32b0f Cartesia: update speed InputParam 2025-08-27 13:34:28 -04:00
Mark Backman
6074af60ef Merge pull request #2521 from pipecat-ai/mb/update-quickstart-pcc-docker
Update quickstart to use pcc docker command
2025-08-27 08:13:31 -07:00
Mark Backman
7ef6893c0d Merge pull request #2523 from sam-s10s/fix/connection-none
Speechmatics TTS connection issue
2025-08-27 08:09:46 -07:00
Sam Sykes
cc5557e051 changelog 2025-08-27 16:07:31 +01:00
Sam Sykes
06f7a92c99 fix to finally statement 2025-08-27 14:43:07 +01:00
Mark Backman
61a333ccae Update quickstart to use pcc docker command 2025-08-26 21:29:13 -04:00
Mark Backman
fc3d84dff7 Merge pull request #2501 from pipecat-ai/mb/aws-tts-more-flexible-auth
Support additional authentication mechanisms for AWS services
2025-08-26 18:05:37 -07:00
Mark Backman
86a37d8cea Add changelog entry for SentryMetrics missing import fix 2025-08-26 21:00:16 -04:00
Mark Backman
3f66acf9f1 Merge pull request #2520 from geluso/bugfix-missing-asyncio-import
add missing import asyncio
2025-08-26 17:59:25 -07:00
Mark Backman
facfaa2dd4 AWSBedrockLLMService: Allow setting auth credentials via env vars 2025-08-26 20:59:12 -04:00
Mark Backman
8250c381d1 AWSPollyTTSService: allow setting auth credentials through provider chain 2025-08-26 20:58:02 -04:00
Steve Geluso
32f9e48865 add missing import asyncio 2025-08-26 17:40:11 -07:00
Filipi Fuchter
76eef837b6 Removing watchdog from SarvamTTSService. 2025-08-26 18:44:58 -03:00
Filipi Fuchter
c9aaa463b7 Mentioning the recent SarvamTTSService changes in the changelog. 2025-08-26 18:44:58 -03:00
pratham-sarvam
6d582e41b7 Added Sarvam TTS Websocket Implementation (#2356)
* Added Sarvam TTS Websocket Implementation

* Addressed some of the comments on PR

* added change voice logic

* added changes from main

* pushing text frames and added flush audio

* updated docs string for better docs

* Addressed comments and added some improvements

* pushed optional args down

* removed new line

* made aiohttp session mandatory in http service

* added push frame and removed unused function

* removed pong message

* added disconnecting logic

---------

Co-authored-by: vinayak-sarvam <vinayak@sarvam.ai>
2025-08-26 18:10:26 -03:00
kompfner
ca29f62bff Merge pull request #2510 from pipecat-ai/pk/fix-set-tools-types
Update types for tools in `LLMSetToolsFrame` and `LLMContextAggregato…
2025-08-26 14:12:21 -04:00
Aleix Conchillo Flaqué
0dced68c3c Merge pull request #2511 from pipecat-ai/aleix/end-of-pipline-warning
PipelineTask: warn if CancelFrame doesn't reach the end
2025-08-26 11:02:26 -07:00
Aleix Conchillo Flaqué
8ab81d289a PipelineTask: warn if CancelFrame doesn't reach the end 2025-08-26 10:36:33 -07:00
Paul Kompfner
f457d00760 Update types for tools in LLMSetToolsFrame and LLMContextAggregator.set_tools(), for two reason:
1. `ToolsSchema` has been supported in `LLMSetToolsFrame` for a while but wasn't properly reflected in these type hints
2. The new universal `LLMContext` expects tools to be either a `ToolsSchema` or `NOT_GIVEN`.
2025-08-26 11:32:21 -04:00
kompfner
f5118c4412 Merge pull request #2440 from pipecat-ai/pk/prototype-llm-failover-attempt-4
Support for runtime LLM switching
2025-08-26 09:55:03 -04:00
Paul Kompfner
a79fe40162 Fix a typo in the CHANGELOG 2025-08-26 09:51:48 -04:00
Paul Kompfner
dcb4949e20 Move ServiceSwitcherFrame and ManuallySwitchServiceFrame to frames.py 2025-08-26 09:47:37 -04:00
Paul Kompfner
8b543e558d Add CHANGELOG entry describing LLMService.run_inference() 2025-08-26 09:47:32 -04:00
Paul Kompfner
8181962236 Add CHANGELOG entry describing LLM switcher 2025-08-26 09:46:51 -04:00
Paul Kompfner
98dc891640 Move CHANGELOG log entry from 0.0.81 to Unreleased 2025-08-26 09:45:49 -04:00
Paul Kompfner
71de0da570 ServiceSwitchers are now controlled using frames rather than with direct method calls 2025-08-26 09:44:15 -04:00
Paul Kompfner
b40c8bb81d Refactor LLMSwitcher into a base ServiceSwitcher and an LLMSwitcher that subclasses it 2025-08-26 09:44:15 -04:00
Paul Kompfner
43f1b59b86 Convert LLM generate_summary() methods to the more generic run_inference() 2025-08-26 09:44:15 -04:00
Paul Kompfner
a0a2bb3aa4 In GeminiLLMAdapter, when translating from the universal LLMContext format, only pull out the first "system" message as the system instruction, and convert subsequent ones into "user" messages. This is a more correct thing to do than simply drop subsequent "system" messages, especially when potentially sharing a context between multiple LLMs. 2025-08-26 09:44:15 -04:00
Paul Kompfner
04a50df3d5 Add LLMSwitcher, with LLMSwitcherStrategyManual as the first supported switching strategy 2025-08-26 09:44:15 -04:00
Paul Kompfner
8c0edffaff Fix bug in AWS Bedrock conversation summarization. It was using an out-of-date pattern (the _client property no longer exists) 2025-08-26 09:44:15 -04:00
Paul Kompfner
fe6063fdbe Introduce an affordance to LLMService for generating a summary of a conversation directly (i.e. without going through the pipeline).
This abstraction will allow us to update Pipecat Flows to avoid reaching into LLM service internals to generate summaries.

In addition to being a helpful refactor to remove a fragile part of Pipecat Flows, this change helps set the stage for supporting the upcoming `LLMSwitcher`, where the “active” LLM will only be determined at runtime—today, Pipecat Flows needs to know ahead of time what type of LLM it’s working with, to load an LLM-specific “adapter” that does the work of generating summaries, among other things.
2025-08-26 09:44:15 -04:00
Paul Kompfner
195146adb2 Bump deprecation warning version, as this commit is not expected to ship until version 0.0.82. 2025-08-26 09:44:15 -04:00
Paul Kompfner
cab9e18cc9 Port recent change to LLMAssistantContextAggregator to universal LLMAssistantAggregator 2025-08-26 09:44:15 -04:00
Paul Kompfner
baef688e4e Port recent changes to LLMUserContextAggregator to universal LLMUserAggregator 2025-08-26 09:44:15 -04:00
Paul Kompfner
f1f43fe500 After a rebase, rename foundational examples showing usage of universal context to avoid naming conflict with a recently-added example. 2025-08-26 09:44:15 -04:00
Paul Kompfner
73b63f8d35 Remove unnecessary import 2025-08-26 09:44:15 -04:00
Paul Kompfner
0c14b33e92 Deprecate GoogleLLMOpenAIBetaService 2025-08-26 09:44:15 -04:00
Paul Kompfner
09beaccaf0 Assorted minor improvements after code review 2025-08-26 09:44:15 -04:00
Paul Kompfner
40557a1aae Remove TODO comment 2025-08-26 09:44:15 -04:00
Paul Kompfner
ecc4cc4a79 Add support for universal LLMContext to RTVIObserver 2025-08-26 09:44:15 -04:00
Paul Kompfner
37be8805f4 ruff 2025-08-26 09:44:15 -04:00
Paul Kompfner
93c7e64995 Add missing PERPLEXITY_API_KEY in env.example 2025-08-26 09:44:15 -04:00
Paul Kompfner
9de2bd61a9 Add supports_universal_context for OpenAILLMService subclasses so that we can gradually roll out support for universal LLMContext in a controlled manner.
Also update `get_chat_completions()` implementations with the new argument type.
2025-08-26 09:44:15 -04:00
Paul Kompfner
566af71862 Add CHANGELOG entry for the universal LLMContext machinery 2025-08-26 09:44:15 -04:00
Paul Kompfner
12064bd6e6 Add a bit of helpful info in an error message 2025-08-26 09:44:15 -04:00
Paul Kompfner
a962459151 Change LLMContextAggregatorPair.create(context) to LLMContextAggregatorPair(context) 2025-08-26 09:44:15 -04:00
Paul Kompfner
8fc76a29bc Raise errors when trying to use universal LLMContext with LLM services that don't yet support it 2025-08-26 09:44:15 -04:00
Paul Kompfner
e3019261a5 Fix classes that subclass BaseLLMAdapter by adding placeholder stuff until support for universal LLMContext machinery comes to all LLM services 2025-08-26 09:44:15 -04:00
Paul Kompfner
fa1f6f1c51 In LLMContext, normalize an empty provided ToolsSchema to NOT_GIVEN 2025-08-26 09:44:15 -04:00
Paul Kompfner
337f00c16c Minor fix: add a type annotation 2025-08-26 09:44:15 -04:00
Paul Kompfner
d50922cdcd Update Google adapter to handle possibility of system message in standard format being provided as a list of text parts rather than just a string. 2025-08-26 09:44:15 -04:00
Paul Kompfner
47f5ca6265 Update Gemini adapter to be able to handle LLMSpecificMessages containing Google-formatted messages 2025-08-26 09:44:15 -04:00
Paul Kompfner
2eddb6ffda [WIP] Universal (LLM-agnostic) context machinery to support runtime LLM switching.
- Remove outdated comment
2025-08-26 09:44:15 -04:00
Paul Kompfner
560a6f2247 [WIP] Universal (LLM-agnostic) context machinery to support runtime LLM switching.
- Make `LLMContext.add_audio_frames_message()` respect the OpenAI standard format
2025-08-26 09:44:15 -04:00
Paul Kompfner
59ecb19000 [WIP] Universal (LLM-agnostic) context machinery to support runtime LLM switching.
- Add support for LLM-specific messages in the universal `LLMContext`, to enable using LLM-specific functionality while still using the universal LLM context
2025-08-26 09:44:15 -04:00
Paul Kompfner
cfb094b3c8 [WIP] Universal (LLM-agnostic) context machinery to support runtime LLM switching.
- Make it so that tools in `LLMContext` are guaranteed to be either a `ToolsSchema` or `NOT_GIVEN`
2025-08-26 09:44:15 -04:00
Paul Kompfner
1f7e8e001b [WIP] Universal (LLM-agnostic) context machinery to support runtime LLM switching.
- Update some types to also allow for universal `LLMContext`
2025-08-26 09:44:15 -04:00
Paul Kompfner
688b136141 [WIP] Universal (LLM-agnostic) context machinery to support runtime LLM switching.
- Add to Google LLM service support for universal LLM context
2025-08-26 09:44:15 -04:00
Paul Kompfner
809c4c1bc5 [WIP] Universal (LLM-agnostic) context machinery to support runtime LLM switching.
- Add to OpenAI LLM service support for universal LLM context
2025-08-26 09:44:15 -04:00
Paul Kompfner
81ca5e6601 [WIP] Universal (LLM-agnostic) context machinery to support runtime LLM switching.
- Formatting fix + dead import cleanup
2025-08-26 09:44:15 -04:00
Paul Kompfner
ebc49d2252 [WIP] Universal (LLM-agnostic) context machinery to support runtime LLM switching.
- Add a "universal" alias for `OpenAILLMContextAssistantTimestampFrame`: `LLMContextAssistantTimestampFrame`
2025-08-26 09:44:15 -04:00
Paul Kompfner
ff8d158e18 [WIP] Universal (LLM-agnostic) context machinery to support runtime LLM switching.
- Added universal `LLMContext` and associated context aggregators.
2025-08-26 09:44:15 -04:00
Aleix Conchillo Flaqué
37980b0854 Merge pull request #2504 from pipecat-ai/aleix/cartesia-fix-timeout-reconnection
CartesiaTTSService: reconnect on Cartesia's timeout
2025-08-25 15:24:31 -07:00
Aleix Conchillo Flaqué
39ebc2c9c1 CartesiaTTSService: reconnect on Cartesia's timeout 2025-08-25 14:09:03 -07:00
Aleix Conchillo Flaqué
ab61d09ec1 Merge pull request #2502 from pipecat-ai/aleix/pipecat-0.0.81
update CHANGELOG for 0.0.81
2025-08-25 09:28:21 -07:00
Aleix Conchillo Flaqué
e4afc0a13c update CHANGELOG for 0.0.81 2025-08-25 08:22:28 -07:00
Mark Backman
dde3d2395b Merge pull request #2491 from pipecat-ai/mb/update-quickstart 2025-08-23 06:34:37 -07:00
Aleix Conchillo Flaqué
30b36c3d6e Merge pull request #2497 from pipecat-ai/aleix/pipeline-task-fix-cancellation
PipelineTask: handle cancellations gracefully
2025-08-22 22:37:12 -07:00
Mark Backman
de4dfc3ed4 Update deployment steps 2025-08-23 00:19:26 -04:00
Aleix Conchillo Flaqué
a0128516ff PipelineTask: handle cancellations gracefully 2025-08-22 19:04:31 -07:00
Aleix Conchillo Flaqué
db3b8c7325 Merge pull request #2496 from pipecat-ai/aleix/release-evals-always-provide-eval-prompt
scripts(evals): always require an eval prompt
2025-08-22 18:11:33 -07:00
Aleix Conchillo Flaqué
9273ec0f25 scripts(evals): always require an eval prompt 2025-08-22 16:57:47 -07:00
Mark Backman
8dfa1187be Merge pull request #2402 from pipecat-ai/mb/voicemail-detection
Add voicemail detection
2025-08-22 14:51:13 -07:00
Mark Backman
e17fd580c6 Update README 2025-08-22 15:56:56 -04:00
mattie ruth backman
3e3d50a855 Fix issue with request images from the camera introduced in smallwebrtctransport 2025-08-22 15:02:33 -04:00
Mark Backman
402661ae03 Prevent user speaking frames from entering the classifier branch after a conversation is detected 2025-08-22 14:09:45 -04:00
Mark Backman
69c6a95b8a Simplify frames in the NotifierGate 2025-08-22 14:09:45 -04:00
Mark Backman
4d49210a73 Rename system_prompt to custom_system_prompt; improve dev ex for classification prompt requirements 2025-08-22 14:09:45 -04:00
Aleix Conchillo Flaqué
5f8a22ef2f Merge pull request #2493 from pipecat-ai/aleix/runner-task-asyncio-cancellation
PipelineRunner/PipelineTask: fix asyncio task cancellation
2025-08-22 09:13:58 -07:00
Aleix Conchillo Flaqué
606ad0826a Merge pull request #2492 from pipecat-ai/aleix/wait-for-task-deprecated
FrameProcessor: wait_for_task is now deprecated
2025-08-22 09:13:34 -07:00
Mark Backman
57028255ee Update changelog, mention text LLMs only 2025-08-22 12:12:17 -04:00
Mark Backman
87ebbab758 Only set/clear voicemail_event when voicemail is detected 2025-08-22 12:12:17 -04:00
Mark Backman
bd401e8d6f Rename TTSBuffer to TTSGate 2025-08-22 12:12:17 -04:00
Mark Backman
f0dfab23e7 Cleanup 2025-08-22 12:12:17 -04:00
Mark Backman
fbc907c371 Change path to extensions 2025-08-22 12:12:17 -04:00
Mark Backman
40b5ef485d Add base NotifierGate class and ClassifierGate, ConversationGate subclasses 2025-08-22 12:12:17 -04:00
Mark Backman
b30af3e155 Tests specify USER_SPEAKS_FIRST or BOT_SPEAKS_FIRST 2025-08-22 12:12:17 -04:00
Mark Backman
446bb5cddf Refactor callback to event 2025-08-22 12:12:17 -04:00
Mark Backman
1c1ee94074 Add 44 to evals, update evals to support user speaking first 2025-08-22 12:12:17 -04:00
Mark Backman
ac30083b45 Add CHANGELOG entry 2025-08-22 12:12:17 -04:00
Mark Backman
ce579d4266 Make on_voicemail_detected callback required, cleanup logging 2025-08-22 12:12:17 -04:00
Mark Backman
5a07b30c7a Class name changes, add TTSStarted/StoppedFrame to the TTSBuffer 2025-08-22 12:12:17 -04:00
Mark Backman
9da33f3897 Handle multiple user inputs from the user when a voicemail is detected; add a configurable timeout to emitting the callback 2025-08-22 12:12:17 -04:00
Mark Backman
5ca82ec61e Final docstrings, comments, and cleanup 2025-08-22 12:12:17 -04:00
Mark Backman
0067c7df47 Add aggregation to classifier LLM output and validate prompt 2025-08-22 12:12:17 -04:00
Mark Backman
ab03db5b0c Updated prompt, add custom system_prompt input 2025-08-22 12:12:17 -04:00
Mark Backman
238d6bf9ab Add buffering logic 2025-08-22 12:12:17 -04:00
Mark Backman
90ae85bab2 More updates—added new voicemail module 2025-08-22 12:12:17 -04:00
Mark Backman
29e09b2053 POC demo in progress 2025-08-22 12:12:17 -04:00
mattie ruth backman
bad9977e8c PR feedback and more explicit about only supporting exporting 1 video 2025-08-22 11:24:22 -04:00
mattie ruth backman
b987579d54 update smallWebRTC screen support to support the utils format for listening to screenshares 2025-08-22 11:24:22 -04:00
mattie ruth backman
40f1f4ff11 Add support to smallWebRTCTransport for receiving screenshare videos 2025-08-22 11:24:22 -04:00
Aleix Conchillo Flaqué
a3ad31d0f6 README: recommended python version is 3.12 2025-08-21 23:50:00 -07:00
Aleix Conchillo Flaqué
8044c4170d PipelineRunner/PipelineTask: fix asyncio task cancellation 2025-08-21 23:50:00 -07:00
Aleix Conchillo Flaqué
bc51e7abc6 FrameProcessor: wait_for_task is now deprecated 2025-08-21 21:17:47 -07:00
Aleix Conchillo Flaqué
256ecf4d71 Merge pull request #2490 from pipecat-ai/aleix/speechmatics-exceptions
Speechmatics exception handling
2025-08-21 19:48:43 -07:00
Aleix Conchillo Flaqué
c16969c4f5 Merge pull request #2489 from pipecat-ai/aleix/daily-python-0.19.7
pyproject: update daily-python to 0.19.7
2025-08-21 19:48:31 -07:00
Mark Backman
8ef64d8c8d Update quickstart, make it deployable 2025-08-21 22:32:34 -04:00
Aleix Conchillo Flaqué
4947d08733 GladiaSTTService: update loggin levels 2025-08-21 18:42:23 -07:00
Aleix Conchillo Flaqué
b61846534d SpeechmaticsSTTService: improve exception handling and loggin 2025-08-21 18:42:23 -07:00
Aleix Conchillo Flaqué
8f01cd220a pyproject: update daily-python to 0.19.7 2025-08-21 18:40:01 -07:00
Aleix Conchillo Flaqué
3abaaf80e0 Merge pull request #2487 from pipecat-ai/aleix/watchdog-timers-removal
remove watchdog timers and specific asyncio implementations
2025-08-21 18:37:35 -07:00
Aleix Conchillo Flaqué
13890fa021 github(tests): use python 3.12 to run unit tests/coverage 2025-08-21 18:09:56 -07:00
Aleix Conchillo Flaqué
802af28888 update pytest-asyncio to 1.1.0 2025-08-21 18:09:56 -07:00
Aleix Conchillo Flaqué
24a628c85e remove watchdog timers and specific asyncio implementations
Watchdog timers have been removed. They were introduced in 0.0.72 to help
diagnose pipeline freezes. Unfortunately, they proved ineffective since they
required developers to use Pipecat-specific queues, iterators, and events to
correctly reset the timer, which limited their usefulness and added friction.
2025-08-21 18:09:56 -07:00
Mark Backman
ddab95835b Merge pull request #2474 from pipecat-ai/mb/add-frames-pipeline-idle
Add UserStarted/StoppedSpeakingFrames to idle_timeout_frames
2025-08-21 03:45:46 -07:00
Mark Backman
cb13f4b4cb Add user speaking and transcription frames to idle_timeout_frames 2025-08-21 06:43:10 -04:00
Aleix Conchillo Flaqué
4793277d34 Merge pull request #2480 from pipecat-ai/aleix/replace-asyncio-waitfor
replace asyncio.wait_for for wait_for2.wait_for
2025-08-20 17:43:32 -07:00
Aleix Conchillo Flaqué
28c729cc36 replace asyncio.wait_for for wait_for2.wait_for 2025-08-20 15:26:57 -07:00
Aleix Conchillo Flaqué
4d07c7b77c Merge pull request #2479 from pipecat-ai/aleix/simplify-dtmf-aggregator
DTMFAggregator: no need for interruption task
2025-08-20 15:15:35 -07:00
Aleix Conchillo Flaqué
4ff0567025 BaseObject: allow keyword arguments 2025-08-20 15:14:31 -07:00
Aleix Conchillo Flaqué
1377dec01b DTMFAggregator: no need for interruption task
Now that system frames are queued there's no need to have an additional task to
push a `BotInterruptionFrame`.
2025-08-20 14:35:04 -07:00
Aleix Conchillo Flaqué
42f4d73a63 Merge pull request #2478 from pipecat-ai/aleix/fix-wait-for2-import
timeout: fix wait_for2 import
2025-08-20 14:29:19 -07:00
Aleix Conchillo Flaqué
f1c1ebf852 timeout: fix wait_for2 import 2025-08-20 14:24:16 -07:00
Aleix Conchillo Flaqué
eb6d43f6cb Merge pull request #2476 from pipecat-ai/aleix/add-asyncio-timeout
implement custom asyncio.wait_for()
2025-08-20 14:20:22 -07:00
Aleix Conchillo Flaqué
f387776985 add custom asyncio.wait_for()
This patch uses `wait_for2` package to implement `asyncio.wait_for()` for
Python < 3.12.

In Python 3.12, `asyncio.wait_for()` is implemented in terms of
`asyncio.timeout()` which fixed a bunch of issues. However, this was never
backported (because of the lack of `async.timeout()`) and there are still many
remainig issues, specially in Python 3.10, in `async.wait_for()`.

See https://github.com/python/cpython/pull/98518
2025-08-20 14:09:05 -07:00
Aleix Conchillo Flaqué
5286591826 Merge pull request #2464 from pipecat-ai/aleix/frame-processor-updates
various frame processor updates
2025-08-20 10:11:49 -07:00
Aleix Conchillo Flaqué
6831e63ec9 PipelineTask: use PipelineSource/PipelineSink and remove tasks 2025-08-20 10:08:54 -07:00
Aleix Conchillo Flaqué
12bcb7db64 ParallelPipeline: use PipelineSource/PipelineSink and remove tasks 2025-08-20 10:08:54 -07:00
Aleix Conchillo Flaqué
1b48b1d860 Pipeline: allow passing user source and sink processors 2025-08-20 10:08:54 -07:00
Aleix Conchillo Flaqué
d161e2767f FrameProcessor: allow pausing/resuming system frames 2025-08-20 10:08:54 -07:00
Aleix Conchillo Flaqué
4e3af00b6d tests: try to use default SleepFrame time 2025-08-20 10:08:54 -07:00
Aleix Conchillo Flaqué
4015aedb86 tests: fix unit tests 2025-08-20 10:08:54 -07:00
Aleix Conchillo Flaqué
75a6ee839b BaseObserver: added new on_process_frame 2025-08-20 10:08:54 -07:00
Aleix Conchillo Flaqué
13ce02c896 FrameProcessor: add new entry_processors() method 2025-08-20 10:08:54 -07:00
Aleix Conchillo Flaqué
2fd5885dc3 pipeline: implement processors property 2025-08-20 07:40:21 -07:00
Aleix Conchillo Flaqué
d743586bfb BasePipeline: move processors_with_metrics() to FrameProcessor 2025-08-20 07:40:21 -07:00
Aleix Conchillo Flaqué
8051017895 pipeline: wrap with pipelines, use direct mode and reduce tasks 2025-08-20 07:40:21 -07:00
Aleix Conchillo Flaqué
dc7bf98ce5 Pipeline: improve performance by using direct mode 2025-08-20 07:40:21 -07:00
Aleix Conchillo Flaqué
609a43a191 FrameProcessor: added processors/next/previous properties 2025-08-20 07:40:19 -07:00
Aleix Conchillo Flaqué
4fb04422d9 FrameProcessor: remove unused set_parent/get_parent 2025-08-20 07:40:02 -07:00
Mark Backman
2f74a7e674 Merge pull request #2469 from pipecat-ai/mb/11labs-text-normalization
Add apply_text_normalization to ElevenLabs TTS services
2025-08-19 18:21:33 -07:00
Mark Backman
5205f56087 Add apply_text_normalization to ElevenLabs TTS services 2025-08-19 21:19:00 -04:00
Mark Backman
694c792af3 Merge pull request #2470 from pipecat-ai/mb/11labs-settings-reconnect
Update ElevenLabsTTSService: update runtime configuration
2025-08-19 18:18:14 -07:00
Mark Backman
406e82a842 Merge pull request #2438 from pipecat-ai/mb/delete-old-docs
Remove stale docs
2025-08-19 12:22:54 -07:00
Mark Backman
837de5f893 Merge pull request #2468 from pipecat-ai/mb/fix-mistral-docs-errors
Fix Mistral docstrings build errors
2025-08-19 12:22:26 -07:00
Mark Backman
10b9b1da2f Merge pull request #2471 from pipecat-ai/mb/add-13j
Add foundational 13j for Azure STT
2025-08-19 12:10:03 -07:00
Mark Backman
7854a2ec83 Add foundational 13j for Azure STT 2025-08-19 14:36:31 -04:00
Mark Backman
ac7c69078f Merge pull request #2442 from pipecat-ai/mb/retry-completion
retry_on_timeout: Anthropic, AWS Bedrock
2025-08-19 11:23:43 -07:00
Mark Backman
c9b4356ea6 Update changelog 2025-08-19 14:21:18 -04:00
Mark Backman
b3e4421191 Add retry_on_timeout to AWSBedrockLLMService 2025-08-19 14:20:35 -04:00
Mark Backman
84058c3948 Add retry_on_timeout to AnthropicLLMService 2025-08-19 14:20:35 -04:00
Mark Backman
aebc781419 Update ElevenLabsTTSService to update when voice_settings change 2025-08-19 13:51:10 -04:00
Mark Backman
4160446f4c Update ElevenLabsTTSService: reconnect on model and language changes 2025-08-19 11:32:54 -04:00
Mark Backman
05a14af184 Fix Mistral docstrings build errors 2025-08-19 10:31:03 -04:00
Filipi da Silva Fuchter
89d2ef2bde Merge pull request #2465 from pipecat-ai/filipi/heygen_changing_log_level
Changing heygen log level to trace.
2025-08-19 07:50:11 -03:00
Filipi Fuchter
f550015efb Changing heygen log level to trace. 2025-08-18 18:00:25 -03:00
Mark Backman
8fa44863fb Merge pull request #2455 from pipecat-ai/vp-log-line
log: add Disconnected from ElevenLabs debug log
2025-08-15 14:12:28 -07:00
vipyne
088cb56922 log: add Disconnected from ElevenLabs debug log 2025-08-15 15:05:07 -05:00
Aleix Conchillo Flaqué
a789e5feea Merge pull request #2451 from pipecat-ai/aleix/audio-buffer-processor-overlap
AudioBufferProcessor: fix overlap when buffer size is set
2025-08-14 15:31:50 -07:00
Aleix Conchillo Flaqué
16ca44131c Merge pull request #2452 from pipecat-ai/aleix/runner-daily-direct-handlesigint
Runner: set handle_sigint to True for Daily direct
2025-08-14 15:25:05 -07:00
Mark Backman
418860cf26 Merge pull request #2450 from pipecat-ai/mb/fix-openai-changelog-entry
fix: Move OpenAI retry changelog entry to the correct release
2025-08-14 15:23:00 -07:00
Aleix Conchillo Flaqué
e2fc8b3dce Runner: set handle_sigint to True for Daily direct 2025-08-14 14:55:52 -07:00
Aleix Conchillo Flaqué
8b641089f8 AudioBufferProcessor: fix overlap when buffer size is set 2025-08-14 14:44:08 -07:00
Mark Backman
d36ed755ce fix: Move OpenAI retry changelog entry to the correct release 2025-08-14 17:34:35 -04:00
Mark Backman
7aaf64fe55 Merge pull request #2447 from pipecat-ai/mb/update-foundational-readme
Improve the foundational example README
2025-08-14 09:51:01 -07:00
Mark Backman
5f52008974 Improve the foundational example README 2025-08-14 11:29:04 -04:00
Mark Backman
d520677b23 Merge pull request #2408 from pipecat-ai/mb/add-mistral-llm
Add MistralLLMService
2025-08-14 08:19:18 -07:00
Mark Backman
42bd1e9d40 Add Mistral to README and pyproject.toml 2025-08-14 11:15:52 -04:00
Mark Backman
7f0494aa04 Override build_chat_completion_params for Mistral 2025-08-14 10:32:18 -04:00
Mark Backman
b7ae2989ac Add foundational 14w-function-calling.py 2025-08-14 10:00:46 -04:00
Mark Backman
2b2b0f8121 Add MistralLLMService 2025-08-14 09:57:14 -04:00
Mark Backman
5ca33a2b00 Merge pull request #2445 from pipecat-ai/mb/fix-changelog-asyncai
fix: Changelog for Async AI bugfix
2025-08-14 06:48:08 -07:00
Mark Backman
938dcb613d fix: Changelog for Async AI bugfix 2025-08-14 09:13:03 -04:00
Mark Backman
bc748cf9d0 Merge pull request #2444 from ashotbagh/fix/asyncai-force-flush
fix(asyncai): force flush WS TTS to eliminate stalls
2025-08-14 06:10:16 -07:00
Ashot
3b55d16a49 fix(asyncai): force flush WS TTS to eliminate stalls 2025-08-14 16:34:34 +04:00
Mark Backman
d7f31e0cbd Merge pull request #2387 from pipecat-ai/mb/retry-chat-completion
Retry chat completions for OpenAILLMService and its subclasses
2025-08-13 14:39:40 -07:00
Mark Backman
c662a2d820 Merge pull request #2437 from pipecat-ai/mb/19-english
Foundational 19: Respond in English
2025-08-13 11:57:24 -07:00
Mark Backman
2c220ca54e Remove stale docs 2025-08-13 14:11:41 -04:00
Mark Backman
89f0ff17c0 Merge pull request #2430 from pipecat-ai/aleix/pipecat-0.0.80
update CHANGELOG for 0.0.80
2025-08-13 09:41:43 -07:00
Mark Backman
b5465364fa Foundational 19: Respond in English 2025-08-13 12:37:13 -04:00
Aleix Conchillo Flaqué
c024eb7b8c update CHANGELOG for 0.0.80 2025-08-13 11:46:24 -04:00
Mark Backman
608570e89d Merge pull request #2433 from pipecat-ai/mb/openai-realtime-text-modality
fix: Add text support to OpenAIRealtimeBetaLLMService
2025-08-13 08:41:33 -07:00
Mark Backman
3ad61a8a04 Remove stray - in changelog 2025-08-13 11:39:59 -04:00
Mark Backman
4c4bae2db6 Remove unnessecary messages from 19 and 19b examples 2025-08-13 11:39:59 -04:00
Mark Backman
901b6b5913 Add foundational 19b 2025-08-13 11:37:38 -04:00
Mark Backman
71cd0f1c87 fix: Add text support to OpenAIRealtimeBetaLLMService 2025-08-13 11:37:36 -04:00
Filipi da Silva Fuchter
a2a419e6db Merge pull request #2435 from pipecat-ai/filipi/small_webrtc_end_pipeline
Fixed an issue where `SmallWebRTCTransport` ended before TTS finished.
2025-08-13 11:58:33 -03:00
Filipi Fuchter
bbbbdc459a Fixed an issue where SmallWebRTCTransport ended before TTS finished. 2025-08-13 11:46:51 -03:00
Mark Backman
d203528dad Merge pull request #2333 from yohan-altrium/fix/2277-azure-tts-ssml-reserved-characters
Fixes 2277 - SSML reserved characters causes Azure TTS to fail
2025-08-13 06:27:30 -07:00
Yohan Liyanage
4bcca7956e Refactors the code based on PR comments and adds the relevant changelog entry. 2025-08-13 16:34:33 +05:30
Aleix Conchillo Flaqué
68a4cf4c68 Merge pull request #2427 from pipecat-ai/aleix/base-watchdog-priority-queue
WatchdogPriorityQueue: this is now a base class
2025-08-12 18:25:59 -07:00
Aleix Conchillo Flaqué
0508ddddfb WatchdogPriorityQueue: fix watchdog sentinel insertion
We now force each inserted item in the priority queue to be a tuple and the
actual value to be last in the tuple. All the previous values in the tuple also
need to be numeric.
2025-08-12 17:40:58 -07:00
Mark Backman
8714c9137f Code review fixes 2025-08-12 17:49:13 -04:00
Mark Backman
4c029fcfa7 Update OpenAILLMService subclasses to use the new build_chat_completion_params function 2025-08-12 17:48:51 -04:00
Mark Backman
5c86f8e687 Add timeout/retry logic and refactor parameter building in BaseOpenAILLMService
- Add timeout (default 5.0s) and retry_on_timeout parameters to constructor
- Implement timeout/retry logic in get_chat_completions using asyncio.wait_for
- Extract build_chat_completion_params() as public method for subclass customization
2025-08-12 17:48:51 -04:00
Mark Backman
54a4d8a9f8 Merge pull request #2422 from thsunkid/thu/fix-set-lang-in-base-whisper
Fix: assigns string code instead of Language enum to BaseWhisperSTTService._language
2025-08-12 11:57:46 -07:00
Mark Backman
38af514d95 Merge pull request #2407 from pipecat-ai/mb/add-gemini-tts
Add GeminiTTSService
2025-08-12 11:56:45 -07:00
Aleix Conchillo Flaqué
6aa80c0b8e Merge pull request #2424 from pipecat-ai/aleix/system-frame-queues-fix
FrameProcessor: fix race condition on FrameProcessorQueue
2025-08-12 11:56:00 -07:00
Mark Backman
e720573e60 Added 07n-interruptible-gemini 2025-08-12 14:54:49 -04:00
Mark Backman
541a43905b Add GeminiTTSService 2025-08-12 14:52:20 -04:00
Aleix Conchillo Flaqué
707df913cd FrameProcessor: fix race condition on FrameProcessorQueue
We need to increment the counters before the await otherwise we could go to a
different task that could add an item with the same counter.

Also, we need to handle non-frame items as well.
2025-08-12 11:48:22 -07:00
Aleix Conchillo Flaqué
3f3d757581 tests: added WatchdogQueue and WatchdogPriorityQueue unit tests 2025-08-12 11:48:22 -07:00
Aleix Conchillo Flaqué
7c781ce816 WatchdogPriorityQueue: make WatchdogPriorityCancelSentinel public 2025-08-12 11:34:31 -07:00
Aleix Conchillo Flaqué
f3efc9da00 WatchdogQueue: make WatchdogQueueCancelSentinel public 2025-08-12 11:34:31 -07:00
Mark Backman
827a70104d Merge pull request #2425 from pipecat-ai/mb/runner-add-exotel
Add Exotel support to the development runner
2025-08-12 10:36:54 -07:00
Mark Backman
a40327305c Add Exotel support to the development runner 2025-08-12 13:21:18 -04:00
Thu Nguyen
168af44429 Fix: assigns string code instead of Language enum to _language attr of BaseWhisperSTTService 2025-08-12 20:27:26 +07:00
Mark Backman
5f8433476c Merge pull request #2397 from gladiaio/PLA-37-GladiaSTTService-minor-tweaks
feat: add minor tweaks to GladiaSTTService
2025-08-12 04:59:40 -07:00
Fabrice Lamant
6a6fea74f5 fix: set default region to none 2025-08-12 13:31:51 +02:00
Mark Backman
91b557ecbf Merge pull request #2419 from pipecat-ai/mb/fix-lockfile-workflow 2025-08-12 03:39:54 -07:00
Mark Backman
be85291414 Merge pull request #2420 from pipecat-ai/mb/runner-handle-sigint-default 2025-08-12 03:39:29 -07:00
Fabrice Lamant
09f171b69d fix: only pass region if set 2025-08-12 12:05:38 +02:00
Aleix Conchillo Flaqué
929fd98958 Merge pull request #2416 from pipecat-ai/aleix/release-evals-vision
scripts(evals): add vision support
2025-08-11 20:08:08 -07:00
Aleix Conchillo Flaqué
1cfbfcaf11 scripts(evals): add vision support 2025-08-11 20:06:24 -07:00
Mark Backman
cd5a3c13bd Development runner: handle_sigint defaults to False 2025-08-11 22:06:56 -04:00
Mark Backman
9b871b0cc5 Update uv.lock, remove lockfile workflow, update CONTRIBUTING with dependency guidance 2025-08-11 21:39:25 -04:00
Mark Backman
0d499a8aa3 Merge pull request #2409 from pipecat-ai/mb/refactor-playht-http
Refactor PlayHTHttpTTSService to use aiohttp
2025-08-11 18:20:58 -07:00
Mark Backman
45292ab13d Merge pull request #2411 from pipecat-ai/mb/fix-websocket-service-retry
fix: WebsocketService retry logic incorrectly handling ConnectionClos…
2025-08-11 18:17:50 -07:00
Mark Backman
be6ea0dbf6 Code review feedback 2025-08-11 21:17:04 -04:00
Aleix Conchillo Flaqué
fb18ae174e Merge pull request #2417 from pipecat-ai/aleix/release-evals-15-series
scripts(evals): add multilinguag support and 15 series
2025-08-11 17:14:47 -07:00
Mark Backman
c4506523ab Refactor PlayHTHttpTTSService to use aiohttp 2025-08-11 19:58:25 -04:00
Aleix Conchillo Flaqué
b360cb31dc scripts(evals): add multilinguag support and 15 series 2025-08-11 15:21:14 -07:00
Aleix Conchillo Flaqué
07f104199c Merge pull request #2415 from pipecat-ai/aleix/moondream-2025-01-09
MoondreamService: update to revision 2025-01-09
2025-08-11 15:10:35 -07:00
Aleix Conchillo Flaqué
bc1949b4bf MoondreamService: update to revision 2025-01-09 2025-08-11 14:54:04 -07:00
Aleix Conchillo Flaqué
2035dd8b39 Merge pull request #2403 from pipecat-ai/aleix/system-frame-queue-priority-fix
FrameProcessor: fix system frame higher priorty and use a PriortyQueue
2025-08-11 13:57:57 -07:00
Aleix Conchillo Flaqué
24c8189327 Merge pull request #2405 from pipecat-ai/aleix/frame-processor-direct-mode
FrameProcessor: introduce direct mode
2025-08-11 13:57:34 -07:00
Mark Backman
998ac32627 Merge pull request #2413 from captaincaius/fix-stt-mute-filter-vad-frames-20250810
Add VADUserStartSpeakingFrame VADUserStopSpeakingFrame to STTMuteFilter (fix #2412)
2025-08-11 13:54:34 -07:00
Aleix Conchillo Flaqué
50645c1c4f README: recommend python 3.11-3.12
Python 3.11 has significant performance improvements compared to 3.10 which
makes Pipecat's asyncio heavy use  specially better.
2025-08-11 13:53:08 -07:00
Aleix Conchillo Flaqué
8ce29ee8f2 FrameProcessor: fix system frame higher priorty and use a PriortyQueue 2025-08-11 13:53:08 -07:00
Captain Caius
7b8aeef4cc update changelog 2025-08-11 12:45:54 -07:00
Aleix Conchillo Flaqué
6a24457f0e FrameProcessor: introduce direct mode
Direct mode avoids creating internal queues and tasks and processes frames right
away. This might be useful for some very simple processors.
2025-08-11 09:26:31 -07:00
Aleix Conchillo Flaqué
2c01c2b5b3 Merge pull request #2404 from pipecat-ai/aleix/examples-22-simplify-main-pipeline
examples(foundational): update 22 series with simple main pipelines
2025-08-11 09:14:39 -07:00
Aleix Conchillo Flaqué
1c2e114fa2 examples(foundational): update 22 series with simple main pipelines 2025-08-11 09:13:09 -07:00
Filipi da Silva Fuchter
0f137e36c2 Merge pull request #2399 from pipecat-ai/filipi/heygen_latency
Improving the latency of the `HeyGenVideoService`.
2025-08-11 09:13:10 -03:00
Filipi Fuchter
b7f12a96f1 Improving the latency of the HeyGenVideoService. 2025-08-11 09:11:17 -03:00
Filipi da Silva Fuchter
3331f71e17 Merge pull request #2398 from pipecat-ai/filipi/ttfb_metrics_video_services
Added TTFB metrics for `HeyGenVideoService` and `TavusVideoService`.
2025-08-11 09:09:27 -03:00
Filipi Fuchter
55d200e2d1 Added TTFB metrics for HeyGenVideoService and TavusVideoService. 2025-08-11 09:07:21 -03:00
Captain Caius
3fae00e067 Add VADUserStartSpeakingFrame VADUserStopSpeakingFrame to STTMuteFilter 2025-08-10 19:35:04 -07:00
Mark Backman
78cdefd191 Merge pull request #2410 from smokyabdulrahman/issue-2373
Support endpoint_id for AzureSTTService
2025-08-10 16:43:29 -07:00
Mark Backman
42502a4f3b fix: WebsocketService retry logic incorrectly handling ConnectionClosedOK exception 2025-08-10 19:35:05 -04:00
Abdulrahman Alrahma
fc67cc3302 Support endpoint_id for AzureSTTService 2025-08-10 22:24:47 +01:00
Aleix Conchillo Flaqué
241ab19228 update uv.lock with numba dependency 2025-08-08 15:12:55 -07:00
Mark Backman
c08e8ec8fb Merge pull request #2391 from pipecat-ai/mb/readme-local-dev
Update README with local dev setup for contributors
2025-08-08 11:15:58 -07:00
Mark Backman
eb9bc9644e Merge pull request #2400 from pipecat-ai/mb/pin-numba-0.61.2
fix: pin numba to >=0.61.2
2025-08-08 11:15:22 -07:00
Mark Backman
3a306dae90 fix: pin numba to >=0.61.2 2025-08-08 10:52:47 -04:00
Fabrice Lamant
e503ea7466 feat: add minor tweaks to GladiaSTTService 2025-08-08 10:21:52 +02:00
Mark Backman
c42cc8254f Update README with local dev setup for contributors 2025-08-07 22:07:35 -04:00
Aleix Conchillo Flaqué
a8e21f7d5d Merge pull request #2395 from pipecat-ai/aleix/examples-15-inherit-parallel-pipeline
examples(foundational): move 15/15a logic into its own processor
2025-08-07 17:59:28 -07:00
Aleix Conchillo Flaqué
c6ef8de578 scripts(evals): fix 14v-function-calling-openai.py 2025-08-07 17:57:47 -07:00
Aleix Conchillo Flaqué
fc571fba42 examples(foundational): move 15/15a logic into its own processor 2025-08-07 17:57:47 -07:00
Mark Backman
0502ee2b5a Merge pull request #2394 from pipecat-ai/mb/uv-lock
Update uv.lock
2025-08-07 15:25:38 -07:00
Mark Backman
9ec047094b Update uv.lock 2025-08-07 18:24:47 -04:00
Mark Backman
d991c106c8 Merge pull request #2393 from pipecat-ai/mb/openai-dep
fix: pin openai package upper bound to <=1.99.1
2025-08-07 15:19:05 -07:00
Mark Backman
312fb23c89 fix: pin openai package upper bound to <=1.99.1 2025-08-07 18:00:25 -04:00
Aleix Conchillo Flaqué
4d7f21d44e Merge pull request #2392 from pipecat-ai/aleix/avoid-using-tts-say
deprecate TTSService.say() method
2025-08-07 13:55:49 -07:00
Aleix Conchillo Flaqué
ec25d0a7c9 examples(foundational): fix 20a-persistent-context-openai 2025-08-07 13:48:32 -07:00
Aleix Conchillo Flaqué
2b8218deaa examples(foundational): use TTSSpeakFrame instead of TTSService.say() 2025-08-07 13:48:32 -07:00
Aleix Conchillo Flaqué
11119430cd TTSService: deprecate say() method 2025-08-07 13:48:32 -07:00
kompfner
9ca79232c1 Merge pull request #2380 from pipecat-ai/pk/deprecate-llm-messages-frame
Deprecate `LLMMessagesFrame`, `LLMUserResponseAggregator`, and `LLMAssistantResponseAggregator`
2025-08-07 15:13:01 -04:00
Paul Kompfner
9ea06c33f7 Bump deprecation version of LLMMessagesFrame, LLMUserResponseAggregator, and LLMAssistantResponseAggregator (the deprecation slipped past the 0.0.78 release) 2025-08-07 14:56:50 -04:00
Paul Kompfner
30a1dd202e Move deprecation of LLMMessagesFrame, LLMUserResponseAggregator, and LLMAssistantResponseAggregator into the next release in the changelog 2025-08-07 14:55:11 -04:00
Paul Kompfner
809ab0b7b6 Improve printed deprecation warning 2025-08-07 14:45:35 -04:00
Paul Kompfner
2b5db9c562 Remove redundant deprecation warning in docstring 2025-08-07 14:45:35 -04:00
Paul Kompfner
b4a886b59f Remove redundant deprecation warning in docstring 2025-08-07 14:45:35 -04:00
Paul Kompfner
07eb00722b Fix langchain unit test 2025-08-07 14:45:35 -04:00
Paul Kompfner
96652b8fba Add new deprecations to changelog 2025-08-07 14:45:30 -04:00
Paul Kompfner
df1fcf0c68 Remove unused import 2025-08-07 14:43:37 -04:00
Paul Kompfner
711f740d9e Update UserResponseAggregator to avoid using the now-deprecated LLMUserResponseAggregator 2025-08-07 14:43:37 -04:00
Paul Kompfner
a0bda98c20 Update langchain to avoid using the now-deprecated LLMMessagesFrame, LLMUserResponseAggregator, and LLMAssistantResponseAggregator 2025-08-07 14:43:37 -04:00
Paul Kompfner
1c1bae35ab Mention deprecation in docstring for LLMMessagesFrame 2025-08-07 14:43:37 -04:00
Paul Kompfner
56c52c2cf2 Deprecate LLMUserResponseAggregator and LLMAssistantResponseAggregator, which depend on the now-deprecated LLMMessagesFrame. 2025-08-07 14:43:37 -04:00
Paul Kompfner
740aee1a1a Fix an issue in AnthropicLLMContext where we would never initialize turns_above_cache_threshold if we were upgrading from an OpenAILLMContext.
I noticed this when working on 22c-natural-conversation-mixed-llms.py
2025-08-07 14:43:37 -04:00
Paul Kompfner
f0391c3280 Progress on updating foundational examples to avoid using the newly-deprecated LLMMessagesFrame.
Skipping over 07b-interruptible-langchain.py for now, as it requires deeper changes involving `LLMUserResponseAggregator` and `LLMAssistantResponseAggregator`.
2025-08-07 14:43:37 -04:00
Paul Kompfner
64e48e4660 Deprecate LLMMessagesFrame.
The same functionality can be achieved using either:
- `LLMMessagesUpdateFrame` with the desired messages, with `run_llm` set to `True`
- `OpenAILLMContextFrame` with a new context initialized with the desired messages
2025-08-07 14:43:37 -04:00
Paul Kompfner
b8147bdbbd Add missing Deepgram key to env.example 2025-08-07 14:43:37 -04:00
Aleix Conchillo Flaqué
315e45d41b Merge pull request #2389 from pipecat-ai/aleix/pipecat-0.0.78
update CHANGELOG for 0.0.78
2025-08-07 11:34:27 -07:00
Aleix Conchillo Flaqué
c057139c48 update CHANGELOG for 0.0.78 2025-08-07 11:14:54 -07:00
Mark Backman
c61e07132d Merge pull request #2390 from pipecat-ai/mb/optionally-ignore-emulated-speech
feat: Add option to ignore emulated user speech while the bot is spea…
2025-08-07 11:14:46 -07:00
Mark Backman
a5f5e418a8 feat: Add option to ignore emulated user speech while the bot is speaking 2025-08-07 14:08:11 -04:00
Mark Backman
31acfaa091 Merge pull request #2388 from pipecat-ai/14v-adding-openai-stt-tts-llm-functioncalling
14v adding OpenAI stt tts llm functioncalling
2025-08-07 10:22:35 -07:00
Mark Backman
69541c8835 Linting fix, plus update eval suite with 14v and others, tiny fix for 14m, too 2025-08-07 13:20:45 -04:00
Varun Singh
af94620839 Add OpenAI function calling example with Pipecat
Introduces a new example script demonstrating how to use OpenAI's function calling capabilities within a Pipecat pipeline. The example integrates OpenAI STT, TTS, and LLM services, registers a weather function, and sets up a pipeline for real-time audio interaction over WebRTC.
2025-08-07 13:20:45 -04:00
Filipi da Silva Fuchter
cec8a74293 Merge pull request #2386 from pipecat-ai/filipi/parallel_pipeline
Only push the StartFrame when all parallel pipelines have processed it
2025-08-07 14:20:30 -03:00
Filipi Fuchter
228a55ac1e Only push the StartFrame when all parallel pipelines have processed it. 2025-08-07 14:18:21 -03:00
Vanessa Pyne
ab9831daf0 Merge pull request #2382 from pipecat-ai/vp-trace-ignore-message
log: warning -> trace for elevenlabs tts unavailable context
2025-08-07 09:35:57 -05:00
Vanessa Pyne
e8c3f5dea6 Update src/pipecat/services/elevenlabs/tts.py
Co-authored-by: Mark Backman <mark@daily.co>
2025-08-07 09:23:33 -05:00
Mark Backman
4288b5e780 Merge pull request #2381 from pipecat-ai/aleix/runner-args-pipeline-idle-timeout
allow specifying PipelineTask idle timeout to runner arguments
2025-08-07 04:47:08 -07:00
Mark Backman
23343dd7e7 Remove idle_timeout_secs from quickstart 2025-08-07 07:44:21 -04:00
Mark Backman
88de5dd415 Merge pull request #2383 from pipecat-ai/aleix/riva-stt-iterator-exception
properly handle concurrent.futures.CancelledError
2025-08-07 04:39:56 -07:00
Mark Backman
33f87589d1 Merge pull request #2384 from pipecat-ai/aleix/release-evals-soniox-inworld-asyncai
scripts(evals): added soniox, inworld and asyncai
2025-08-07 04:35:18 -07:00
Aleix Conchillo Flaqué
7ed14ad91f scripts(evals): added soniox, inworld and asyncai 2025-08-06 23:14:50 -07:00
Aleix Conchillo Flaqué
86c6141580 DailyTransport: handle future cancellation 2025-08-06 23:03:20 -07:00
Aleix Conchillo Flaqué
c97643c797 RivaSTTService: always use WatchdogQueue 2025-08-06 23:00:03 -07:00
Aleix Conchillo Flaqué
434d346079 RivaSTTService: handle future cancellation 2025-08-06 22:59:52 -07:00
vipyne
64ae8d2394 log: warning -> trace for elevenlabs tts unavailable context 2025-08-06 22:40:47 -05:00
Aleix Conchillo Flaqué
786f24c9db examples(foundational): use RunnerArgs.pipeline_idle_timeout_secs 2025-08-06 19:38:06 -07:00
Aleix Conchillo Flaqué
38951aab56 scripts(evals): use RunnerArguments.pipeline_idle_timeout_secs 2025-08-06 19:37:29 -07:00
Aleix Conchillo Flaqué
ed8b0655a8 scripts(evals): fix runner eval cancellation
We need to call asyncio.gather() just once, not for every cancelled task.
2025-08-06 19:36:42 -07:00
Aleix Conchillo Flaqué
0b2b9f5f1b RunnerArguments: add pipeline_idle_timeout_secs 2025-08-06 19:35:40 -07:00
Filipi da Silva Fuchter
ad1841b739 Merge pull request #2377 from pipecat-ai/filipi/fast_api_freeze_issue
Fixed an issue in BaseOutputTransport where the loop could consume all CPU.
2025-08-06 14:58:36 -03:00
Mark Backman
b0c002c128 Merge pull request #2378 from pipecat-ai/mb/pyproject-compat-updates
Add new python-compatiblity workflow to check for dependency compatib…
2025-08-06 10:40:29 -07:00
Mark Backman
820176084c Add support for 3.13 by bumping min version for vllm to 0.9.0, adding support for torch and torchaudio up to the next major version 2025-08-06 13:36:01 -04:00
Mark Backman
5b7e31beff README updates for python versions 2025-08-06 13:36:01 -04:00
Mark Backman
41a22d3bf4 Add new python-compatiblity workflow to check for dependency compatibility across supported python versions 2025-08-06 13:36:01 -04:00
Filipi Fuchter
84fecabac5 Removing audio sleep from FastAPI and WebSocket server when they are not connected. 2025-08-06 14:02:51 -03:00
Filipi Fuchter
bbe01d10ef Fixed an issue in BaseOutputTransport where the loop could consume all CPU. 2025-08-06 12:42:58 -03:00
Mark Backman
4364990fd0 Merge pull request #2375 from fabrice404/gladia-region-selection
Gladia region selection
2025-08-06 07:01:24 -07:00
Fabrice Lamant
e576fa481f Add new region feature for GladiaSTTService in CHANGELOG 2025-08-06 15:31:10 +02:00
Mark Backman
ac6b59cae2 Merge pull request #2372 from pipecat-ai/mb/dotenv-dev
Wider package support for python-dotenv dev dep
2025-08-06 06:06:01 -07:00
Mark Backman
12e168e740 Wider package support for python-dotenv dev dep 2025-08-06 09:04:01 -04:00
Mark Backman
ac354f66ed Merge pull request #2371 from pipecat-ai/mb/docs-gen-with-uv
Update docs auto-generation to use uv
2025-08-06 06:02:52 -07:00
Mark Backman
eead793927 Merge pull request #2370 from pipecat-ai/mb/update-workflows-for-uv
Update workflows for uv
2025-08-06 05:54:55 -07:00
Fabrice Lamant
0594a203fc Add new region parameter to Gladia 2025-08-06 14:28:06 +02:00
Mark Backman
2337a2d92d Remove dev-requirements.txt and mentions of it 2025-08-05 21:46:50 -04:00
Mark Backman
b3e2603553 Update workflows for uv 2025-08-05 21:45:48 -04:00
Mark Backman
29229df719 Speed up builds, mocking large packages 2025-08-05 21:34:40 -04:00
Aleix Conchillo Flaqué
61f4dd2ff2 scripts(evals): fix 14e-function-calling-google 2025-08-05 17:44:45 -07:00
Mark Backman
42094fb206 Update docs auto-generation to use uv 2025-08-05 20:37:27 -04:00
Aleix Conchillo Flaqué
58c41f112a DailyRunnerArguments: make body optional (fix) 2025-08-05 16:59:36 -07:00
Aleix Conchillo Flaqué
fa55e2ca9b Merge pull request #2369 from pipecat-ai/aleix/pipeline-task-cancellation-fix
PipelineTask: always try to cancel things
2025-08-05 16:56:23 -07:00
Aleix Conchillo Flaqué
313fdc92a1 DailyRunnerArguments: make body optional 2025-08-05 16:39:18 -07:00
Aleix Conchillo Flaqué
d22d2da03d PipelineTask: always try to cancel things
In a previous commit we only cleanup things if the user run
`task.cancel()`. However, if the task finishes cleanly we were not cancelling
anything.
2025-08-05 16:24:59 -07:00
Aleix Conchillo Flaqué
de2ae9a2ec Merge pull request #2368 from pipecat-ai/aleix/release-evals-runner-args-fix
pass runner arguments to release evals
2025-08-05 16:23:32 -07:00
Aleix Conchillo Flaqué
52a6d8013c scripts(evals): pass runner arguments to run_bot() 2025-08-05 16:13:32 -07:00
Aleix Conchillo Flaqué
f14cbae9b5 DailyRunnerArguments: make token optional
DailyTransport can get a None token value.
2025-08-05 15:46:12 -07:00
Aleix Conchillo Flaqué
8fe906438a Merge pull request #2358 from pipecat-ai/aleix/system-frames-queued
system frames are now queued
2025-08-05 15:09:52 -07:00
Mark Backman
d8f4db8827 Merge pull request #2367 from richtermb/richtermb/fix-errorframe-docstring
Rename 'source' parameter to 'processor' in ErrorFrame class document…
2025-08-05 15:09:18 -07:00
Aleix Conchillo Flaqué
a5ea6e1642 FrameProcessor: system frames are now queued
System frames are now queued. Before, system frames could be generated from any
task and would not guarantee any order which was causing undesired
behavior. Also, it was possible to get into some rare recursion issues because
of the way system frames were executed (they were executed in-place, meaning
calling `push_frame()` would finish after the system frame traversed all the
pipeline). This makes system frames more deterministic.
2025-08-05 15:05:50 -07:00
richtermb
e777e78510 Rename 'source' parameter to 'processor' in ErrorFrame class documentation for clarity. 2025-08-05 15:02:00 -07:00
Aleix Conchillo Flaqué
49a5a1e375 PipelineTask: improve task cancellation 2025-08-05 14:49:23 -07:00
Aleix Conchillo Flaqué
61cb45d61b PipelineTask: also wait on CancelFrame
Before CancelFrames didn't need to be waited for because system frames were
processed in-place and therefore calling push_frame() would finalize after it
traversed all the pipeline. Now, system frames are queued so we need to wait
until CancelFrame reaches the end of the pipeline.
2025-08-05 14:49:23 -07:00
Aleix Conchillo Flaqué
6c6deb4e85 Merge pull request #2366 from pipecat-ai/aleix/run-bot-runner-arguments
add sigint/sigterm to RunnerArguments
2025-08-05 14:46:19 -07:00
Aleix Conchillo Flaqué
66ad29b2b1 example: pass RunnerArguments to run_bot()
This lets us get handle_sigint from RunnerArguments which knows where the
application is running and if SIGINT/SIGTERM should be handled or not.
2025-08-05 14:38:55 -07:00
Aleix Conchillo Flaqué
21e4f0d56d PipelineRunner: argument ordering 2025-08-05 14:38:55 -07:00
Aleix Conchillo Flaqué
627b44bac2 runner: use new RunnerArguments handle_sigint/handle_sigterm
This allow us to control applications behavior from the runner arguments, which
depen on the environment they run.
2025-08-05 14:38:55 -07:00
Aleix Conchillo Flaqué
e2a576beca RunnerArguments: add handle_sigint/handle_sigterm 2025-08-05 14:32:28 -07:00
Mark Backman
2981afb117 Merge pull request #2361 from pipecat-ai/mb/fix-changelog-simli
Fix Simli changelog entry placement
2025-08-05 14:12:38 -07:00
Mark Backman
d422c57b52 Merge pull request #2304 from pipecat-ai/mb/cartesia-cjk-lang-support
CartesiaTTSService: Add CJK lang support for word timestamps
2025-08-05 14:08:53 -07:00
Mark Backman
06d8bbd154 Fix Simli changelog entry placement 2025-08-05 17:07:58 -04:00
Mark Backman
35108afeb8 Merge pull request #2360 from pipecat-ai/mb/add-heygen-readme
Add HeyGen to the README page
2025-08-05 14:05:33 -07:00
Mark Backman
a0e2a2754a Merge pull request #2327 from richtermb/richtermb/push-more-error-frames
Add source parameter to ErrorFrame and set it in FrameProcessor. Upda…
2025-08-05 14:04:52 -07:00
Mark Backman
b8d620c8bb Merge pull request #2362 from pipecat-ai/mb/aws-stt-languages
AWSTranscribeSTTService add support for new languages
2025-08-05 14:00:50 -07:00
Mark Backman
f26bbe4092 Merge pull request #2363 from pipecat-ai/mb/update-14p
Update 14p, add 14p to evals, add Google creds to env.example
2025-08-05 14:00:13 -07:00
Mark Backman
52cb23f8d5 Merge pull request #2364 from pipecat-ai/mb/11labs-default-model
ElevenLabs TTS services: revert to Turbo v2.5 as default model
2025-08-05 13:59:59 -07:00
Filipi da Silva Fuchter
17e7f8a2cd Merge pull request #2352 from pipecat-ai/filipi/webrtc_audio_frame
Implementing if the bot it is speaking or not based on the SpeechOutputAudioRawFrame
2025-08-05 17:26:44 -03:00
richtermb
efddc4732c Refactor ErrorFrame: rename source field to processor for clarity and update related references in FrameProcessor. 2025-08-05 13:25:08 -07:00
richtermb
4476a76ad7 Merge branch 'main' into richtermb/push-more-error-frames 2025-08-05 13:23:24 -07:00
Filipi Fuchter
64592b274b Fixed an issue where BotStartedSpeakingFrame and BotStoppedSpeakingFrame
were not emitted when using `TavusVideoService` or `HeyGenVideoService`.
2025-08-05 17:11:34 -03:00
Aleix Conchillo Flaqué
95c661bdaa Merge pull request #2365 from pipecat-ai/aleix/update-release-evals-for-new-runner
scripts(evals): update to use new runner function
2025-08-05 13:07:57 -07:00
Aleix Conchillo Flaqué
5546c8e01c scripts(evals): update to use new runner function 2025-08-05 11:46:28 -07:00
Mark Backman
14e02c1b08 ElevenLabs TTS services: revert to Turbo v2.5 as default model 2025-08-05 13:44:37 -04:00
Mark Backman
ba5a5c7187 Update 14p, add 14p to evals, add Google creds to env.example 2025-08-05 13:30:36 -04:00
Mark Backman
2378cba155 AWSTranscribeSTTService add support for new languages 2025-08-05 13:01:06 -04:00
Mark Backman
1138c92a00 Merge pull request #2217 from simliai/main
feat: Add Simli Trinity models support to pipecat
2025-08-05 09:01:20 -07:00
Antonyesk601
fb82dc8308 Update CHANGELOG.md
Co-authored-by: Mark Backman <m.backman@gmail.com>
2025-08-05 17:46:01 +02:00
Mark Backman
c8a15f30fa Add HeyGen to the README page 2025-08-05 10:54:49 -04:00
antonyesk601
72168070f1 update changelog 2025-08-05 14:18:41 +00:00
Mark Backman
50083d1144 Merge pull request #2342 from pipecat-ai/mb/runner-connect-request-body
Development runner handles body information in the RTVI connect request
2025-08-05 05:15:55 -07:00
Mark Backman
64732518c6 Development runner handles body information in the RTVI connect request 2025-08-05 07:26:34 -04:00
Mark Backman
c3d8ea210f CartesiaTTSService: Add CJK lang support for word timestamps 2025-08-05 07:17:40 -04:00
Filipi da Silva Fuchter
98ed614f63 Merge pull request #2357 from pipecat-ai/filipi/latency_observer
Added detailed latency logging to UserBotLatencyLogObserver.
2025-08-05 08:11:48 -03:00
Filipi Fuchter
e43bdff31e Added detailed latency logging to UserBotLatencyLogObserver. 2025-08-04 19:36:30 -03:00
Mark Backman
42e48381fe Merge pull request #2355 from pipecat-ai/mb/update-readme-for-uv
Update the README with uv-centric steps
2025-08-04 15:28:07 -07:00
Mark Backman
df7ba64b4a Merge pull request #2354 from pipecat-ai/mb/revert-43-inline-script
Remove inline script from foundational 43a
2025-08-04 15:27:28 -07:00
Mark Backman
ac9b2e67a7 Merge pull request #2349 from pipecat-ai/mb/runner-support-daily-url-arg
daily runner util: remove arg parsing, add auto room, token generation
2025-08-04 13:44:25 -07:00
Mark Backman
c9918607cf Merge pull request #2335 from pipecat-ai/mb/quickstart-runner-improvements
Improve quickstart logging, runner startup message
2025-08-04 13:43:42 -07:00
Mark Backman
cfda410a43 Remove foundational requirements.txt file 2025-08-04 16:38:37 -04:00
Mark Backman
c773ddf83d Update foundational examples README 2025-08-04 16:26:11 -04:00
Mark Backman
54d5ebbc20 Update the README with uv-centric steps 2025-08-04 16:11:38 -04:00
Mark Backman
35002cd727 Remove inline script from foundational 43a 2025-08-04 15:46:18 -04:00
Mark Backman
53d75faa47 Merge pull request #2330 from pipecat-ai/mb/runner-clean-proxy-name
Runner: strip protocol from proxy address
2025-08-04 10:42:16 -07:00
Mark Backman
2901dddc2b Merge pull request #2338 from pipecat-ai/mb/update-release-evals-tavus
Add Tavus, HeyGen, Simli to release-evals
2025-08-04 10:38:27 -07:00
Mark Backman
3a8d809837 Runner: strip protocol from proxy address 2025-08-04 13:38:02 -04:00
Mark Backman
1b3c2bee30 Merge pull request #2331 from pipecat-ai/mb/more-foundational
Updating more foundational examples
2025-08-04 10:37:15 -07:00
Mark Backman
69f049cb63 Merge pull request #2328 from pipecat-ai/mb/04b-example-cleanup
Align 04b livekit example with other foundational examples
2025-08-04 10:36:57 -07:00
Mark Backman
f609971637 daily runner util: remove arg parsing, add auto room, token generation 2025-08-03 21:50:44 -04:00
Mark Backman
b9a2a9b729 Add Tavus, HeyGen, Simli to release-evals 2025-08-02 09:35:06 -04:00
Mark Backman
55731df999 Improve quickstart logging, runner startup message 2025-08-02 08:40:05 -04:00
Yohan Liyanage
248206e234 Fixes 2277 - SSML reserved characters in LLM generated text causes Azure TTS to fail. 2025-08-02 12:49:29 +05:30
Mark Backman
cc9950e72d Updating more foundational examples 2025-08-01 19:58:40 -04:00
richtermb
6814c390ba Update CHANGELOG to reflect the addition of the source field in ErrorFrame for improved error tracking. 2025-08-01 14:47:57 -07:00
Richter Brzeski
c2d05ad23b Merge branch 'pipecat-ai:main' into richtermb/push-more-error-frames 2025-08-01 14:47:08 -07:00
richtermb
91568eeddc Update type hint for source in ErrorFrame to use forward declaration for improved clarity. 2025-08-01 12:52:56 -07:00
richtermb
165d6b4c1d Update CHANGELOG to include new source field in ErrorFrame for error tracking. 2025-08-01 12:25:29 -07:00
Mark Backman
519da9cc61 Align 04b livekit example with other foundational examples 2025-08-01 14:28:15 -04:00
richtermb
ead4e97ab5 Add source parameter to ErrorFrame and set it in FrameProcessor. Updated error handling in AnthropicLLMService and DeepgramSTTService to include ErrorFrame with source information. 2025-08-01 11:14:50 -07:00
antonyesk601
1cbf7ae480 fix: remove unused variable; fix: remove redundant logic 2025-07-23 08:26:44 +00:00
antonyesk601
688031efd6 fix: use undeclared variable _preinitialized. fix: double send of start frame 2025-07-18 08:23:04 +00:00
antonyesk601
0f9e69d3c7 feat: Add Simli Trinity models support to pipecat 2025-07-17 11:55:40 +00:00
357 changed files with 16811 additions and 5222 deletions

View File

@@ -21,24 +21,20 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
id: setup_python
uses: actions/setup-python@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
python-version: '3.10'
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
version: "latest"
- name: Set up Python
run: uv python install 3.10
- name: Install development dependencies
run: uv sync --group dev
- name: Build project
run: |
source .venv/bin/activate
python -m build
- name: Install project and other Python dependencies
run: |
source .venv/bin/activate
pip install --editable .
run: uv build
- name: Install project in editable mode
run: uv pip install --editable .

View File

@@ -18,35 +18,28 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
id: setup_python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Cache virtual environment
uses: actions/cache@v3
with:
# We are hashing dev-requirements.txt and test-requirements.txt which
# contain all dependencies needed to run the tests.
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
path: .venv
run: uv python install 3.12
- name: Install system packages
id: install_system_packages
run: |
sudo apt-get install -y portaudio19-dev
- name: Setup virtual environment
- name: Install dependencies
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt -r test-requirements.txt
uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain
- name: Run tests with coverage
run: |
source .venv/bin/activate
coverage run
coverage xml
uv run coverage run
uv run coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:

View File

@@ -22,25 +22,22 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
python-version: "3.10"
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install development Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
version: "latest"
- name: Set up Python
run: uv python install 3.10
- name: Install development dependencies
run: uv sync --group dev
- name: Ruff formatter
id: ruff-format
run: |
source .venv/bin/activate
ruff format --diff
run: uv run ruff format --diff
- name: Ruff linter (all rules)
id: ruff-check
run: |
source .venv/bin/activate
ruff check
run: uv run ruff check

View File

@@ -17,23 +17,17 @@ jobs:
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.gitref }}
- name: Set up Python
id: setup_python
uses: actions/setup-python@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
python-version: '3.10'
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
version: "latest"
- name: Set up Python
run: uv python install 3.10
- name: Install development dependencies
run: uv sync --group dev
- name: Build project
run: |
source .venv/bin/activate
python -m build
run: uv build
- name: Upload wheels
uses: actions/upload-artifact@v4
with:

View File

@@ -12,23 +12,16 @@ jobs:
with:
fetch-tags: true
fetch-depth: 100
- name: Set up Python
id: setup_python
uses: actions/setup-python@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
python-version: '3.10'
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
version: "latest"
- name: Set up Python
run: uv python install 3.10
- name: Install development dependencies
run: uv sync --group dev
- name: Build project
run: |
source .venv/bin/activate
python -m build
run: uv build
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
@@ -38,7 +31,7 @@ jobs:
publish-to-test-pypi:
name: "Publish to Test PyPI"
runs-on: ubuntu-latest
needs: [ build ]
needs: [build]
environment:
name: testpypi
url: https://pypi.org/p/pipecat-ai

View File

@@ -0,0 +1,61 @@
name: Python Compatibility Test
on:
push:
branches: [main, develop]
paths: ['pyproject.toml']
pull_request:
branches: [main, develop]
paths: ['pyproject.toml']
jobs:
test-compatibility:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ['3.10.18', '3.11.13', '3.12.11', '3.13.5']
name: Python ${{ matrix.python-version }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
portaudio19-dev \
libcairo2-dev \
libgirepository1.0-dev \
pkg-config
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: 'latest'
- name: Set up Python ${{ matrix.python-version }}
run: |
uv python install ${{ matrix.python-version }}
uv python pin ${{ matrix.python-version }}
- name: Test uv sync with all extras (Python < 3.13)
if: "!startsWith(matrix.python-version, '3.13.')"
run: |
uv sync --group dev --all-extras --no-extra krisp
- name: Test uv sync without PyTorch extras (Python 3.13+)
if: startsWith(matrix.python-version, '3.13.')
run: |
uv sync --group dev --all-extras \
--no-extra krisp \
--no-extra ultravox \
--no-extra local-smart-turn \
--no-extra moondream \
--no-extra mlx-whisper
- name: Verify installation
run: |
uv run python --version
uv run python -c "import pipecat; print('✅ Pipecat imports successfully')"

View File

@@ -23,17 +23,12 @@ jobs:
token: ${{ secrets.QUICKSTART_SYNC_TOKEN }}
path: quickstart-repo
- name: Sync files (excluding READMEs)
- name: Sync files (excluding uv.lock and README.md)
run: |
# Copy code files only, skip READMEs
cp examples/quickstart/bot.py quickstart-repo/
cp examples/quickstart/requirements.txt quickstart-repo/
cp examples/quickstart/env.example quickstart-repo/
# Copy any other files that aren't README.md
# Copy all files except uv.lock and README.md
find examples/quickstart -type f \
-not -name "README.md" \
-not -name "*.md" \
-not -name "uv.lock" \
-exec cp {} quickstart-repo/ \;
- name: Commit and push changes

View File

@@ -22,31 +22,23 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
id: setup_python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Cache virtual environment
uses: actions/cache@v3
with:
# We are hashing dev-requirements.txt and test-requirements.txt which
# contain all dependencies needed to run the tests.
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
path: .venv
run: uv python install 3.12
- name: Install system packages
id: install_system_packages
run: |
sudo apt-get install -y portaudio19-dev
- name: Setup virtual environment
- name: Install dependencies
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt -r test-requirements.txt
uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain
- name: Test with pytest
run: |
source .venv/bin/activate
pytest
uv run pytest

View File

@@ -1,42 +0,0 @@
name: Update lockfile
on:
push:
paths:
- 'pyproject.toml'
branches:
- main
workflow_dispatch: # Allows manual triggering from GitHub UI
jobs:
update-lockfile:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
# This gives the workflow permission to push back to the repo
token: ${{ secrets.GITHUB_TOKEN }}
- name: Install uv
uses: astral-sh/setup-uv@v1
- name: Update lockfile
run: uv lock
- name: Check for changes
id: verify-changed-files
run: |
if [ -n "$(git status --porcelain)" ]; then
echo "changed=true" >> $GITHUB_OUTPUT
else
echo "changed=false" >> $GITHUB_OUTPUT
fi
- name: Commit lockfile
if: steps.verify-changed-files.outputs.changed == 'true'
run: |
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
git add uv.lock
git commit -m "chore: update uv.lock after dependency changes"
git push

View File

@@ -9,22 +9,14 @@ build:
- python3-dev
- libasound2-dev
jobs:
pre_build:
- python -m pip install --upgrade pip
- pip install wheel setuptools
post_build:
- echo "Build completed"
post_install:
- pip install uv
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra ultravox --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
sphinx:
configuration: docs/api/conf.py
fail_on_warning: false
python:
install:
- requirements: docs/api/requirements.txt
- method: pip
path: .
search:
ranking:
api/*: 5

View File

@@ -7,15 +7,569 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
## Added
- Added `pipecat.extensions.ivr` for automated IVR system navigation with
configurable goals and conversation handling. Supports DTMF input, verbal
responses, and intelligent menu traversal.
Basic usage:
```python
from pipecat.extensions.ivr.ivr_navigator import IVRNavigator
# Create IVR navigator with your goal
ivr_navigator = IVRNavigator(
llm=llm_service,
ivr_prompt="Navigate to billing department to dispute a charge"
)
# Handle different outcomes
@ivr_navigator.event_handler("on_conversation_detected")
async def on_conversation(processor, conversation_history):
# Switch to normal conversation mode
pass
@ivr_navigator.event_handler("on_ivr_status_changed")
async def on_ivr_status(processor, status):
if status == IVRStatus.COMPLETED:
# End pipeline, transfer call, or start bot conversation
elif status == IVRStatus.STUCK:
# Handle navigation failure
```
- `BaseOutputTransport` now implements `write_dtmf()` by loading DTMF audio and
sending it through the transport. This makes sending DTMF generic across all
output transports.
- Added new config parameters to `GladiaSTTService`.
- PreProcessingConfig > `audio_enhancer` to enhance audio quality.
- CustomVocabularyItem > `pronunciations` and `language` to specify special pronunciations and in which language it will be pronounced.
## Changed
- `pipecat.frames.frames.KeypadEntry` is deprecated and has been moved to
`pipecat.audio.dtmf.types.KeypadEntry`.
## Removed
- Remove `StopInterruptionFrame`. This was a legacy frame that was not being
used really anywhere and it didn't provide any useful meaning. It was only
pushed after `UserStoppedSpeakingFrame`, so developers can just use
`UserStoppedSpeakingFrame`.
- `DailyTransport.write_dtmf()` has been removed in favor of the generic
`BaseOutputTransport.write_dtmf()`.
- Remove deprecated `DailyTransport.send_dtmf()`.
## Deprecated
- `pipecat.frames.frames.KeypadEntry` is deprecated use
`pipecat.audio.dtmf.types.KeypadEntry` instead.
## Fixed
- Fixed an issue where `PipelineTask` was not cleaning up the observers.
## [0.0.82] - 2025-08-28
### Added
- Added a new `LLMRunFrame` to trigger an LLM response:
```python
await task.queue_frames([LLMRunFrame()])
```
This replaces `OpenAILLMContextFrame`, which youd previously typically use
like this:
```python
await task.queue_frames([context_aggregator.user().get_context_frame()])
```
Use this way of kicking off your conversation when youve already initialized
your context and are simply instructing the bot when to go:
```python
context = OpenAILLMContext(messages, tools)
context_aggregator = llm.create_context_aggregator(context)
# ...
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
# Kick off the conversation.
await task.queue_frames([LLMRunFrame()])
```
Note that if you want to add new messages when kicking off the conversation,
you could use `LLMMessagesAppendFrame` with `run_llm=True` instead:
```python
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
# Kick off the conversation.
await task.queue_frames([LLMMessagesAppendFrame(new_messages, run_llm=True)])
```
In the rare case you dont have a context aggregator in your pipeline, then
you may continue using a context frame.
- Added support for switching between audio+text to text-only modes within the
same pipeline. This is done by pushing
`LLMConfigureOutputFrame(skip_tts=True)` to enter text-only mode, and
disabling it to return to audio+text. The LLM will still generate tokens and
add them to the context, but they will not be sent to TTS.
- Added `skip_tts` field to `TextFrame`. This lets a text frame bypass TTS while
still being included in the LLM context. Useful for cases like structured text
that isnt meant to be spoken but should still contribute to context.
- Added a `cancel_timeout_secs` argument to `PipelineTask` which defines how
long the pipeline has to complete cancellation. When `PipelineTask.cancel()`
is called, a `CancelFrame` is pushed through the pipeline and must reach the
end. If it does not reach the end within the specified time, a warning is
shown and the wait is aborted.
- Added a new "universal" (LLM-agnostic) `LLMContext` and accompanying
`LLMContextAggregatorPair`, which will eventually replace `OpenAILLMContext`
(and the other under-the-hood contexts) and the other context aggregators.
The new universal `LLMContext` machinery allows a single context to be shared
between different LLMs, enabling runtime LLM switching and scenarios like
failover.
From the developer's point of view, switching to using the new universal
context machinery will usually be a matter of going from this:
```python
context = OpenAILLMContext(messages, tools)
context_aggregator = llm.create_context_aggregator(context)
```
To this:
```python
context = LLMContext(messages, tools)
context_aggregator = LLMContextAggregatorPair(context)
```
To start, the universal `LLMContext` is supported with the following LLM
services:
- `OpenAILLMService`
- `GoogleLLMService`
- Added a new `LLMSwitcher` class to enable runtime LLM switching, built atop a
new generic `ServiceSwitcher`.
Switchers take a switching strategy. The first available strategy is
`ServiceSwitcherStrategyManual`.
To switch LLMs at runtime, the LLMs must be sharing one instance of the new
universal `LLMContext` (see above bullet).
```python
# Instantiate your LLM services
llm_openai = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm_google = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
# Instantiate a switcher
# (ServiceSwitcherStrategyManual defaults to OpenAI, as it's first in the list)
llm_switcher = LLMSwitcher(
llms=[llm_openai, llm_google], strategy_type=ServiceSwitcherStrategyManual
)
# Create your pipeline
pipeline = Pipeline(
[
transport.input(),
stt,
context_aggregator.user(),
llm_switcher,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
# ...
# Whenever is appropriate, switch LLMs!
await task.queue_frames([ManuallySwitchServiceFrame(service=llm_google)])
```
- Added an `LLMService.run_inference()` method to LLM services to enable
direct, out-of-band (i.e. out-of-pipeline) inference.
### Changed
- Updated `daily-python` to 0.19.8.
- `PipelineTask` now waits for `StartFrame` to reach the end of the pipeline
before pushing any other frames.
- Updated `CartesiaTTSService` and `CartesiaHttpTTSService` to align with
Cartesia's changes for the `speed` parameter. It now takes only an enum of
`slow`, `normal`, or `fast`.
- Added support to `AWSBedrockLLMService` for setting authentication
credentials through environment variables.
- Updated `SarvamTTSService` to use WebSocket streaming for real-time audio
generation with multiple Indian languages, with HTTP support still available
via `SarvamHttpTTSService`.
### Fixed
- Fixed an RTVI issue that was causing frames to be pushed before pipeline was
properly initialized.
- Fixed some `get_messages_for_logging()` that were returning a JSON string
instead of a list.
- Fixed a `DailyTransport` issue that prevented DTMF tones from being sent.
- Fixed a missing import in `SentryMetrics`.
- Fixed `AWSPollyTTSService` to support AWS credential provider chain (IAM
roles, IRSA, instance profiles) instead of requiring explicit environment
variables.
- Fixed a `CartesiaTTSService` issue that was causing the application to hang
after Cartesia's 5 minutes timed out.
- Fixed an issue preventing `SpeechmaticsSTTService` from transcribing audio.
## [0.0.81] - 2025-08-25
### Added
- Added `pipecat.extensions.voicemail`, a module for detecting voicemail vs.
live conversation, primarily intended for use in outbound calling scenarios.
The voicemail module is optimized for text LLMs only.
- Added new frames to the `idle_timeout_frames` arg: `TranscriptionFrame`,
`InterimTranscriptionFrame`, `UserStartedSpeakingFrame`, and
`UserStoppedSpeakingFrame`. These additions serve as indicators of user
activity in the pipeline idle detection logic.
- Allow passing custom pipeline sink and source processors to a
`Pipeline`. Pipeline source and sink processors are used to know and control
what's coming in and out of a `Pipeline` processor.
- Added `FrameProcessor.pause_processing_system_frames()` and
`FrameProcessor.resume_processing_system_frames()`. These allow to pause and
resume the processing of system frame.
- Added new `on_process_frame()` observer method which makes it possible to know
when a frame is being processed.
- Added new `FrameProcessor.entry_processor()` method. This allows you to access
the first non-compound processor in a pipeline.
- Added `FrameProcessor` properties `processors`, `next` and `previous`.
- `ElevenLabsTTSService` now supports additional runtime changes to the `model`,
`language`, and `voice_settings` parameters.
- Added `apply_text_normalization` support to `ElevenLabsTTSService` and
`ElevenLabsHttpTTSService`.
- Added `MistralLLMService`, using Mistral's chat completion API.
- Added the ability to retry executing a chat completion after a timeout period
for `OpenAILLMService` and its subclasses, `AnthropicLLMService`, and
`AWSBedrockLLMService`. The LLM services accept new args:
`retry_timeout_secs` and `retry_on_timeout`. This feature is disabled by
default.
### Changed
- Updated `daily-python` to 0.19.7.
### Deprecated
- `FrameProcessor.wait_for_task()` is deprecated. Use `await task` or
`await asyncio.wait_for(task, timeout)` instead.
### Removed
- Watchdog timers have been removed. They were introduced in 0.0.72 to help
diagnose pipeline freezes. Unfortunately, they proved ineffective since they
required developers to use Pipecat-specific queues, iterators, and events to
correctly reset the timer, which limited their usefulness and added friction.
- Removed unused `FrameProcessor.set_parent()` and
`FrameProcessor.get_parent()`.
### Fixed
- Fixed an issue that would cause `PipelineRunner` and `PipelineTask` to not
handle external asyncio task cancellation properly.
- Added `SpeechmaticsSTTService` exception handling on connection and sending.
- Replaced `asyncio.wait_for()` for `wait_for2.wait_for()` for Python <
3.12. because of issues regarding task cancellation (i.e. cancellation is
never propagated).
See https://bugs.python.org/issue42130
- Fixed an `AudioBufferProcessor` issues that would cause audio overlap when
setting a max buffer size.
- Fixed an issue where `AsyncAITTSService` had very high latency in responding
by adding `force=true` when sending the flush command.
### Performance
- Improve `PipelineTask` performance by using direct mode processors and by
removing unnecessary tasks.
- Improve `ParallelPipeline` performance by using direct mode, by not
creating a task for each frame and every sub-pipeline and also by removing
other unnecessary tasks.
- `Pipeline` performance improvements by using direct mode.
### Other
- Added `14w-function-calling-mistal.py` using `MistralLLMService`.
- Added `13j-azure-transcription.py` using `AzureSTTService`.
## [0.0.80] - 2025-08-13
### Added
- Added `GeminiTTSService` which uses Google Gemini to generate TTS output. The
Gemini model can be prompted to insert styled speech to control the TTS
output.
- Added Exotel support to Pipecat's development runner. You can now connect
using the runner with `uv run bot.py -t exotel` and an ngrok connection to
HTTP port 7860.
- Added `enable_direct_mode` argument to `FrameProcessor`. The direct mode is
for processors which require very little I/O or compute resources, that is
processors that can perform their task almost immediately. These type of
processors don't need any of the internal tasks and queues usually created by
frame processors which means overall application performance might be slightly
increased. Use with care.
- Added TTFB metrics for `HeyGenVideoService` and `TavusVideoService`.
- Added `endpoint_id` parameter to `AzureSTTService`. ([Custom EndpointId](https://docs.azure.cn/en-us/ai-services/speech-service/how-to-recognize-speech?pivots=programming-language-python#use-a-custom-endpoint))
### Changed
- `WatchdogPriorityQueue` now requires the items to be inserted to always be
tuples and the size of the tuple needs to be specified in the constructor when
creating the queue with the `tuple_size` argument.
- Updated Moondream to revision `2025-01-09`.
- Updated `PlayHTHttpTTSService` to no longer use the `pyht` client to remove
compatibility issues with other packages. Now you can use the PlayHT HTTP
service with other services, like GoogleLLMService.
- Updated `pyproject.toml` to once again pin `numba` to `>=0.61.2` in order to
resolve package versioning issues.
- Updated the `STTMuteFilter` to include `VADUserStartedSpeakingFrame` and
`VADUserStoppedSpeakingFrame` in the list of frames to filter when the
filtering is on.
### Performance
- Improving the latency of the `HeyGenVideoService`.
- Improved some frame processors performance by using the new frame processor
direct mode. In direct mode a frame processor will process frames right away
avoiding the need for internal queues and tasks. This is useful for some
simple processors. For example, in processors that wrap other processors
(e.g. `Pipeline`, `ParallelPipeline`), we add one processor before and one
after the wrapped processors (internally, you will see them as sources and
sinks). These sources and sinks don't do any special processing and they
basically forward frames. So, for these simple processors we now enable the
new direct mode which avoids creating any internal tasks (and queues) and
therefore improves performance.
### Fixed
- Fixed an issue with the `BaseWhisperSTTService` where the language was
specified as an enum and not a string.
- Fixed an issue where `SmallWebRTCTransport` ended before TTS finished.
- Fixed an issue in `OpenAIRealtimeBetaLLMService` where specifying a `text`
`modalities` didn't result in text being outputted from the model.
- Added SSML reserved character escaping to `AzureBaseTTSService` to properly
handle special characters in text sent to Azure TTS. This fixes an issue
where characters like `&`, `<`, `>`, `"`, and `'` in LLM-generated text would
cause TTS failures.
- Fixed a `WatchdogPriorityQueue` issue that could cause an exception when
compating watchdog cancel sentinel items with other items in the queue.
- Fixed an issue that would cause system frames to not be processed with higher
priority than other frames. This could cause slower interruption times.
- Fixed an issue where retrying a websocket connection error would result in an
error.
### Other
- Add foundation example `19b-openai-realtime-beta-text.py`, showing how to use
`OpenAIRealtimeBetaLLMService` to output text to a TTS service.
- Add vision support to release evals so we can run the foundational examples 12
series.
- Added foundational example `15a-switch-languages.py` to release evals. It is
able to detect if we switched the language properly.
- Updated foundational examples to show how to enclose complex logic
(e.g. `ParallelPipeline`) into a single processor so the main pipeline becomes
simpler.
- Added `07n-interruptible-gemini.py`, demonstrating how to use
`GeminiTTSService`.
## [0.0.79] - 2025-08-07
### Changed
- Changed `pipecat-ai`'s `openai` dependency to `>=1.74.0,<=1.99.1` due to a
breaking change in `openai` 1.99.2 ([commit](https://github.com/openai/openai-python/commit/657f551dbe583ffb259d987dafae12c6211fba06))
### Deprecated
- `TTSService.say()` is deprecated, push a `TTSSpeakFrame` instead. Calling
functions directly is a discouraged pattern in Pipecat because, for example,
it might cause issues with frame ordering.
- `LLMMessagesFrame` is deprecated, in favor of either:
- `LLMMessagesUpdateFrame` with `run_llm=True`
- `OpenAILLMContextFrame` with desired messages in a new context
- `LLMUserResponseAggregator` and `LLMAssistantResponseAggregator` are
deprecated, as they depended on the now-deprecated `LLMMessagesFrame`. Use
`LLMUserContextAggregator` and `LLMAssistantResponseAggregator` (or
LLM-specific subclasses thereof) instead.
## [0.0.78] - 2025-08-07
### Added
- Added `enable_emulated_vad_interruptions` to `LLMUserAggregatorParams`.
When user speech is emulated (e.g. when a transcription is received but
VAD doesn't detect speech), this parameter controls whether the emulated
speech can interrupt the bot. Default is False (emulated speech is ignored
while the bot is speaking).
- Added new `handle_sigint` and `handle_sigterm` to `RunnerArguments`. This
allows applications to know what settings they should use for the environment
they are running on. Also, added `pipeline_idle_timeout_secs` to be able to
control the `PipelineTask` idle timeout.
- Added `processor` field to `ErrorFrame` to indicate `FrameProcessor` that
generated the error.
- Added new language support for `AWSTranscribeSTTService`. All languages
supporting streaming data input are now supported:
https://docs.aws.amazon.com/transcribe/latest/dg/supported-languages.html
- Added support for Simli Trinity Avatars. A new `is_trinity_avatar` parameter
has been introduced to specify whether the provided `faceId` corresponds to a
Trinity avatar, which is required for optimal Trinity avatar performance.
- The development runner how handles custom `body` data for `DailyTransport`.
The `body` data is passed to the Pipecat client. You can POST to the `/start`
endpoint with a request body of:
```
{
"createDailyRoom": true,
"dailyRoomProperties": { "start_video_off": true },
"body": { "custom_data": "value" }
}
```
The `body` information is parsed and used in the application. The
`dailyRoomProperties` are currently not handled.
- Added detailed latency logging to `UserBotLatencyLogObserver`, capturing
average response time between user stop and bot start, as well as minimum and
maximum response latency.
- Added Chinese, Japanese, Korean word timestamp support to
`CartesiaTTSService`.
- Added `region` parameter to `GladiaSTTService`. Accepted values: eu-west
(default), us-west.
### Changed
- System frames are now queued. Before, system frames could be generated from
any task and would not guarantee any order which was causing undesired
behavior. Also, it was possible to get into some rare recursion issues because
of the way system frames were executed (they were executed in-place, meaning
calling `push_frame()` would finish after the system frame traversed all the
pipeline). This makes system frames more deterministic.
- Changed the default model for both `ElevenLabsTTSService` and
`ElevenLabsHttpTTSService` to `eleven_turbo_v2_5`. The rationale for this
change is that the Turbo v2.5 model exhibits the most stable voice quality
along with very low latency TTFB; latencies are on par with the Flash v2.5
model. Also, the Turbo v2.5 model outputs word/timestamp alignment data with
correct spacing.
- The development runners `/connect` and `/start` endpoint now both return
`dailyRoom` and `dailyToken` in place of the previous `room_url` and `token`.
- Updated the `pipecat.runner.daily` utility to only a take `DAILY_API_URL` and
`DAILY_SAMPLE_ROOM_URL` environment variables instead of argparsing `-u` and
`-k`, respectively.
- Updated `daily-python` to 0.19.6.
- Changed `TavusVideoService` to send audio or video frames only after the
transport is ready, preventing warning messages at startup.
- The development runner now strips any provided protocol (e.g. https://) from
the proxy address and issues a warning. It also strips trailing `/`.
### Deprecated
- In the `pipecat.runner.daily`, the `configure_with_args()` function is
deprecated. Use the `configure()` function instead.
- The development runner's `/connect` endpoint is deprecated and will be
removed in a future version. Use the `/start` endpoint in its place. In the
meantime, both endpoints work and deliver equivalent functionality.
### Fixed
- Fixed a `DailyTransport` issue that would result in an unhandled
`concurrent.futures.CancelledError` when a future is cancelled.
- Fixed a `RivaSTTService` issue that would result in an unhandled
`concurrent.futures.CancelledError` when a future is cancelled when reading
from the audio chunks from the incoming audio stream.
- Fixed an issue in the `BaseOutputTransport`, mainly reproducible with
`FastAPIWebsocketOutputTransport` when the audio mixer was enabled, where the
loop could consume 100% CPU by continuously returning without delay, preventing
other asyncio tasks (such as cancellation or shutdown signals) from being
processed.
- Fixed an issue where `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame`
were not emitted when using `TavusVideoService` or `HeyGenVideoService`.
- Fixed an issue in `LiveKitTransport` where empty `AudioRawFrame`s were pushed
down the pipeline. This resulted in warnings by the STT processor.
- Fixed `PiperTTSService` to send text as a JSON object in the request body,
@@ -32,6 +586,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixed an issue in `TaskObserver` (a proxy to all observers) that was degrading
global performance.
### Other
- Added `07aa-interruptible-soniox.py`, `07ab-interruptible-inworld-http.py`,
`07ac-interruptible-asyncai.py` and `07ac-interruptible-asyncai-http.py`
release evals.
## [0.0.77] - 2025-07-31
### Added

View File

@@ -31,6 +31,23 @@ git push origin your-branch-name
Our maintainers will review your PR, and once everything is good, your contributions will be merged!
## Dependency Management
This project uses [uv](https://docs.astral.sh/uv/) for dependency management. The `uv.lock` file is committed to ensure reproducible builds.
### Adding or Updating Dependencies
1. Edit `pyproject.toml` to add/update dependencies
2. Run `uv lock` to update the lockfile with new dependency resolution
3. Run `uv sync` to install the updated dependencies locally
4. Always commit both files together:
```bash
git add pyproject.toml uv.lock
git commit -m "feat: add new dependency for feature X"
```
**Important:** Never manually edit `uv.lock`. It's auto-generated by `uv lock`.
## Code Style and Documentation
### Python Code Style

116
README.md
View File

@@ -54,12 +54,12 @@ You can connect to Pipecat from any platform using our official SDKs:
| Category | Services |
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
@@ -69,80 +69,106 @@ You can connect to Pipecat from any platform using our official SDKs:
## ⚡ Getting started
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when youre ready.
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when you're ready.
```shell
# Install the module
pip install pipecat-ai
1. Install uv
# Set up your environment
cp dot-env.template .env
```
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
```
To keep things lightweight, only the core framework is included by default. If you need support for third-party AI services, you can add the necessary dependencies with:
> **Need help?** Refer to the [uv install documentation](https://docs.astral.sh/uv/getting-started/installation/).
```shell
pip install "pipecat-ai[option,...]"
```
2. Install the module
```bash
# For new projects
uv init my-pipecat-app
cd my-pipecat-app
uv add pipecat-ai
# Or for existing projects
uv add pipecat-ai
```
3. Set up your environment
```bash
cp env.example .env
```
4. To keep things lightweight, only the core framework is included by default. If you need support for third-party AI services, you can add the necessary dependencies with:
```bash
uv add "pipecat-ai[option,...]"
```
> **Using pip?** You can still use `pip install pipecat-ai` and `pip install "pipecat-ai[option,...]"` to get set up.
## 🧪 Code examples
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
- [Example apps](https://github.com/pipecat-ai/pipecat-examples) — complete applications that you can use as starting points for development
## 🛠️ Hacking on the framework itself
## 🛠️ Contributing to the framework
1. Set up a virtual environment before following these instructions. From the root of the repo:
### Prerequisites
```shell
python3 -m venv venv
source venv/bin/activate
**Minimum Python Version:** 3.10
**Recommended Python Version:** 3.12
### Setup Steps
1. Clone the repository and navigate to it:
```bash
git clone https://github.com/pipecat-ai/pipecat.git
cd pipecat
```
2. Install the development dependencies:
2. Install development and testing dependencies:
```shell
pip install -r dev-requirements.txt
```bash
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp --no-extra local
```
3. Install the git pre-commit hooks (these help ensure your code follows project rules):
3. Install the git pre-commit hooks:
```shell
pre-commit install
```bash
uv run pre-commit install
```
4. Install the `pipecat-ai` package locally in editable mode:
### Python 3.13+ Compatibility
```shell
pip install -e .
```
Some features require PyTorch, which doesn't yet support Python 3.13+. Install using:
> The `-e` or `--editable` option allows you to modify the code without reinstalling.
```bash
uv sync --group dev --all-extras \
--no-extra gstreamer \
--no-extra krisp \
--no-extra local \
--no-extra local-smart-turn \
--no-extra mlx-whisper \
--no-extra moondream \
--no-extra ultravox
```
5. Include optional dependencies as needed. For example:
> **Tip:** For full compatibility, use Python 3.12: `uv python pin 3.12`
```shell
pip install -e ".[daily,deepgram,cartesia,openai,silero]"
```
6. (Optional) If you want to use this package from another directory:
```shell
pip install "path_to_this_repo[option,...]"
```
> **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors.
### Running tests
Install the test dependencies:
To run all tests, from the root directory:
```shell
pip install -r test-requirements.txt
```bash
uv run pytest
```
From the root directory, run:
Run a specific test suite:
```shell
pytest
```bash
uv run pytest tests/test_name.py
```
### Setting up your editor

View File

@@ -1,13 +0,0 @@
build~=1.2.2
coverage~=7.9.1
grpcio-tools~=1.67.1
pip-tools~=7.4.1
pre-commit~=4.2.0
pyright~=1.1.402
pytest~=8.4.1
pytest-asyncio~=1.0.0
pytest-aiohttp==1.1.0
ruff~=0.12.1
setuptools~=78.1.1
setuptools_scm~=8.3.1
python-dotenv~=1.1.1

View File

@@ -1,10 +0,0 @@
# Pipecat Docs
## [Architecture Overview](architecture.md)
Learn about the thinking behind the framework's design.
## [A Frame's Progress](frame-progress.md)
See how a Frame is processed through a Transport, a Pipeline, and a series of Frame Processors.

View File

@@ -1,17 +1,27 @@
#!/bin/bash
# Check if sphinx-build is installed
if ! command -v sphinx-build &> /dev/null; then
echo "Error: sphinx-build is not installed or not in PATH" >&2
echo "Please install Sphinx using: pip install -r requirements.txt" >&2
# Build docs using uv
echo "Installing dependencies with uv..."
uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra ultravox --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
# Check if sphinx-build is available
if ! uv run sphinx-build --version &> /dev/null; then
echo "Error: sphinx-build is not available" >&2
exit 1
fi
# Clean previous build
rm -rf _build
echo "Building documentation..."
# Build docs matching ReadTheDocs configuration
sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
uv run sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
# Open docs (MacOS)
open _build/html/index.html
if [ $? -eq 0 ]; then
echo "Documentation built successfully!"
# Open docs (MacOS)
open _build/html/index.html
else
echo "Documentation build failed!" >&2
exit 1
fi

View File

@@ -1,4 +1,5 @@
import logging
import os
import sys
from datetime import datetime
from pathlib import Path
@@ -28,6 +29,7 @@ extensions = [
suppress_warnings = [
"autodoc.mocked_object",
"toc.not_included",
]
# Napoleon settings
@@ -45,85 +47,40 @@ autodoc_default_options = {
# Mock imports for optional dependencies
autodoc_mock_imports = [
"riva",
"livekit",
"pyht", # Base PlayHT package
"pyht.async_client", # PlayHT specific imports
"pyht.client",
"pyht.protos",
"pyht.protos.api_pb2",
"pipecat_ai_playht", # PlayHT wrapper
"aiortc",
"aiortc.mediastreams",
"cv2",
"av",
"pyneuphonic",
"mem0",
"mlx_whisper",
"anthropic",
"assemblyai",
"boto3",
"azure",
"cartesia",
"deepgram",
"elevenlabs",
"fal",
"gladia",
"google",
"krisp",
"langchain",
"lmnt",
"noisereduce",
"openpipe",
"simli",
"soundfile",
"soniox",
# Krisp - has build issues on some platforms
"pipecat_ai_krisp",
"pyaudio",
"krisp",
# System-specific GUI libraries
"_tkinter",
"tkinter",
"daily",
"daily_python",
# Moondream dependencies
"torch",
"transformers",
"intel_extension_for_pytorch",
# Ultravox dependencies
"huggingface_hub",
# Platform-specific audio libraries (if needed)
"gi",
"gi.require_version",
"gi.repository",
# OpenCV - sometimes has import issues during docs build
"cv2",
# Heavy ML packages excluded from ReadTheDocs
# ultravox dependencies
"vllm",
"vllm.engine.arg_utils",
# local-smart-turn dependencies
"coremltools",
"coremltools.models",
"coremltools.models.MLModel",
"torch",
"torch.nn",
"torch.nn.functional",
"torchaudio",
# moondream dependencies
"transformers",
"transformers.AutoTokenizer",
# Langchain dependencies
"langchain_core",
"langchain_core.messages",
"langchain_core.runnables",
"langchain_core.messages.AIMessageChunk",
"langchain_core.runnables.Runnable",
# LiveKit dependencies
"livekit",
"livekit.rtc",
"livekit_api",
"livekit_protocol",
"tenacity",
"tenacity.retry",
"tenacity.stop_after_attempt",
"tenacity.wait_exponential",
"rtc",
"rtc.Room",
"rtc.RoomOptions",
"rtc.AudioSource",
"rtc.LocalAudioTrack",
"rtc.TrackPublishOptions",
"rtc.TrackSource",
"rtc.AudioStream",
"rtc.AudioFrameEvent",
"rtc.AudioFrame",
"rtc.Track",
"rtc.TrackKind",
"rtc.RemoteParticipant",
"rtc.RemoteTrackPublication",
"rtc.DataPacket",
# Riva dependencies
"transformers.AutoFeatureExtractor",
"AutoFeatureExtractor",
"timm",
"einops",
"intel_extension_for_pytorch",
"huggingface_hub",
# riva dependencies
"riva",
"riva.client",
"riva.client.Auth",
@@ -133,57 +90,14 @@ autodoc_mock_imports = [
"riva.client.AudioEncoding",
"riva.client.proto.riva_tts_pb2",
"riva.client.SpeechSynthesisService",
# Local CoreML Smart Turn dependencies
"coremltools",
"coremltools.models",
"coremltools.models.MLModel",
"torch",
"torch.nn",
"torch.nn.functional",
"transformers",
"transformers.AutoFeatureExtractor",
# Also add specific classes that are imported
"AutoFeatureExtractor",
# Sentry dependencies
"sentry_sdk",
# AWS Nova Sonic dependencies
"aws_sdk_bedrock_runtime",
"aws_sdk_bedrock_runtime.client",
"aws_sdk_bedrock_runtime.config",
"aws_sdk_bedrock_runtime.models",
"smithy_aws_core",
"smithy_aws_core.credentials_resolvers",
"smithy_aws_core.credentials_resolvers.static",
"smithy_aws_core.identity",
"smithy_core",
"smithy_core.aio",
"smithy_core.aio.eventstream",
# MCP dependencies (you may already have these)
"mcp",
"mcp.client",
"mcp.client.session_group",
"mcp.client.sse",
"mcp.client.stdio",
"mcp.ClientSession",
"mcp.StdioServerParameters",
# gstreamer
"gi",
"gi.require_version",
"gi.repository",
# Protobuf mocks
"pipecat.frames.protobufs.frames_pb2",
"pipecat.serializers.protobuf",
"google.protobuf",
"google.protobuf.descriptor",
"google.protobuf.descriptor_pool",
"google.protobuf.runtime_version",
"google.protobuf.symbol_database",
"google.protobuf.internal.builder",
# MLX dependencies (Apple Silicon specific)
"mlx",
"mlx_whisper", # Note: might need underscore format too
]
# HTML output settings
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]
html_static_path = ["_static"] if os.path.exists("_static") else []
autodoc_typehints = "signature" # Show type hints in the signature only, not in the docstring
html_show_sphinx = False

View File

@@ -14,13 +14,14 @@ Quick Links
* `Join our Community <https://discord.gg/pipecat>`_
.. toctree::
:maxdepth: 3
:maxdepth: 2
:caption: API Reference
:hidden:
Adapters <api/pipecat.adapters>
Audio <api/pipecat.audio>
Clocks <api/pipecat.clocks>
Extensions <api/pipecat.extensions>
Frames <api/pipecat.frames>
Metrics <api/pipecat.metrics>
Observers <api/pipecat.observers>

View File

@@ -1,59 +0,0 @@
# Sphinx dependencies
sphinx>=8.1.3
sphinx-rtd-theme
sphinx-markdown-builder
sphinx-autodoc-typehints
toml
# Install all extras individually to ensure they're properly resolved
pipecat-ai[anthropic]
pipecat-ai[assemblyai]
pipecat-ai[asyncai]
pipecat-ai[aws]
pipecat-ai[azure]
pipecat-ai[cartesia]
pipecat-ai[cerebras]
pipecat-ai[deepseek]
pipecat-ai[daily]
pipecat-ai[deepgram]
pipecat-ai[elevenlabs]
pipecat-ai[fal]
pipecat-ai[fireworks]
pipecat-ai[fish]
pipecat-ai[gladia]
pipecat-ai[google]
pipecat-ai[grok]
pipecat-ai[groq]
pipecat-ai[inworld]
# pipecat-ai[krisp] # Mocked
pipecat-ai[koala]
# pipecat-ai[langchain] # Mocked
# pipecat-ai[livekit] # Mocked
pipecat-ai[lmnt]
pipecat-ai[local]
# pipecat-ai[local-smart-turn] # Mocked
# pipecat-ai[mem0] # Mocked
# pipecat-ai[mlx-whisper] # Mocked
# pipecat-ai[moondream] # Mocked
pipecat-ai[nim]
# pipecat-ai[neuphonic] # Mocked
pipecat-ai[noisereduce]
pipecat-ai[openai]
# pipecat-ai[openpipe]
# pipecat-ai[playht] # Mocked due to grpcio conflict with riva
pipecat-ai[qwen]
pipecat-ai[remote-smart-turn]
# pipecat-ai[riva] # Mocked
pipecat-ai[runner]
pipecat-ai[sambanova]
pipecat-ai[silero]
pipecat-ai[simli]
pipecat-ai[soundfile]
pipecat-ai[soniox]
pipecat-ai[speechmatics]
pipecat-ai[tavus]
pipecat-ai[together]
# pipecat-ai[ultravox] # Mocked
# pipecat-ai[webrtc] # Mocked
pipecat-ai[websocket]
pipecat-ai[whisper]

View File

@@ -1,17 +0,0 @@
# Pipecat architecture guide
## Frames
Frames can represent discrete chunks of data, for instance a chunk of text, a chunk of audio, or an image. They can also be used to as control flow, for instance a frame that indicates that there is no more data available, or that a user started or stopped talking. They can also represent more complex data structures, such as a message array used for an LLM completion.
## FrameProcessors
Frame processors operate on frames. Every frame processor implements a `process_frame` method that consumes one frame and produces zero or more frames. Frame processors can do simple transforms, such as concatenating text fragments into sentences, or they can treat frames as input for an AI Service, and emit chat completions based on message arrays or transform text into audio or images.
## Pipelines
Pipelines are lists of frame processors linked together. Frame processors can push frames upstream or downstream to their peers. A very simple pipeline might chain an LLM frame processor to a text-to-speech frame processor, with a transport as an output.
## Transports
Transports provide input and output frame processors to receive or send frames respectively. For example, the `DailyTransport` does this with a WebRTC session joined to a Daily.co room.

View File

@@ -1,46 +0,0 @@
# A Frame's Progress
1. A user says “Hello, LLM” and the cloud transcription service delivers a transcription to the Transport.
![A transcript frame arrives](images/frame-progress-01.png)
2. The Transport places a Transcription frame in the Pipelines source queue.
![Frame in source queue](images/frame-progress-02.png)
3. The Pipeline passes the Transcription frame to the first Frame Processor in its list, the LLM User Message Aggregator.
![To UMA](images/frame-progress-03.png)
4. The LLM User Message Aggregator updates the LLM Context with a `{“user”: “Hello LLM”}` message.
![Update context](images/frame-progress-04.png)
5. The LLM User Message Aggregator yields an LLM Message Frame, containing the updated LLM Context. The Pipeline passes this frame to the LLM Frame Processor.
![Update context](images/frame-progress-05.png)
6. The LLM Frame Processor creates a streaming chat completion based on the LLM context and yields the first chunk of a response, Text Frame with the value “Hi, “. The Pipeline passes this frame to the TTS Frame Processor. The TTS Frame Processor aggregates this response but doesnt yield anything, yet, because its waiting for a full sentence.
![LLM yields Text](images/frame-progress-06.png)
7. The LLM Frame Processor yields another Text Frame with the value “there.”. The Pipeline passes this frame to the TTS Frame Processor.
![LLM yields more Text](images/frame-progress-07.png)
8. The TTS Frame Processor now has a full sentence, so it starts streaming audio based on “Hi, there.” It yields the first chunk of streaming audio as an Audio frame, which the Pipeline passes to the LLM Assistant Message Aggregator.
![TTS yields Audio](images/frame-progress-08.png)
9. The LLM Assistant Message Aggregator doesnt do anything with Audio frames, so it immediately yields the frame, unchanged. This is the convention for all Frame Processors: frames that the processor doesnt process should be immediately yielded.
![pass-through](images/frame-progress-09.png)
10. The Pipeline places the first Audio frame in its sink queue, which is being watched by the Transport. Since the frame is now in a queue, the Pipeline can continue processing other frames. Note that the source and sink queues form a sort of “boundary of concurrent processing” between a Pipeline and the outside world. In a Pipeline, Frames are processed sequentially; once a Frame is on a queue it can be processed in parallel with the frames being processed by the Pipeline. TODO: link to a more in-depth section about this.
![sink queue](images/frame-progress-10.png)
11. The TTS Frame Processor yields another Audio frame as the Transport transmits the first Audio frame.
![parallel audio](images/frame-progress-11.png)
12. As before, the LLM Assistant Message Aggregator immediately yields the Audio frame and the Pipeline places the Audio frame in the sink queue.
![sink queue 2](images/frame-progress-12.png)
13. The TTS Frame Processor has no more frames to yield. The LLM Frame Processor emits an LLM Response End Frame, which the Pipeline passes to the TTS Frame Processor.
![response end](images/frame-progress-13.png)
14. The TTS Frame Processor immediately yields the LLM Response End Frame, so the Pipeline passes it along to the LLM Assistant Message Aggregator. The LLM Assistant Message Aggregator updates the LLM Context with the full response from the LLM. TODO TODO: I realized I forgot that the TSS Frame Processor also yields the Text frames that the LLM emitted so that the LLM Assistant Message Aggregator could accumulate them, arrggh.
![response end](images/frame-progress-14.png)
15. The system is quiet, and waiting for the next message from the Transport.
![response end](images/frame-progress-15.png)

View File

@@ -1,110 +0,0 @@
# Understanding Different Frame Types in the Pipecat System
In the Pipecat system, frames are used to represent different types of data and control signals that flow through the pipeline. Understanding these frame types is crucial for working with the system effectively. This tutorial will cover the main categories of frames and their specific uses.
## 1. Base Frame Classes
### Frame
The `Frame` class is the base class for all frames. It includes:
- `id`: A unique identifier
- `name`: A descriptive name
- `pts`: Presentation timestamp (optional)
### DataFrame
`DataFrame` is a subclass of `Frame` and serves as a base for most data-carrying frames.
## 2. Audio Frames
### AudioRawFrame
Represents a chunk of audio with properties:
- `audio`: Raw audio data
- `sample_rate`: Audio sample rate
- `num_channels`: Number of audio channels
Subclasses include:
- `InputAudioRawFrame`: For audio from input sources
- `OutputAudioRawFrame`: For audio to be played by output devices
- `TTSAudioRawFrame`: For audio generated by Text-to-Speech services
## 3. Image Frames
### ImageRawFrame
Represents an image with properties:
- `image`: Raw image data
- `size`: Image dimensions
- `format`: Image format (e.g., JPEG, PNG)
Subclasses include:
- `InputImageRawFrame`: For images from input sources
- `OutputImageRawFrame`: For images to be displayed
- `UserImageRawFrame`: For images associated with a specific user
- `VisionImageRawFrame`: For images with associated text for description
- `URLImageRawFrame`: For images with an associated URL
### SpriteFrame
Represents an animated sprite, containing a list of `ImageRawFrame` objects.
## 4. Text and Transcription Frames
### TextFrame
Represents a chunk of text, used for various purposes in the pipeline.
### TranscriptionFrame
A specialized `TextFrame` for speech transcriptions, including:
- `user_id`: ID of the speaking user
- `timestamp`: When the transcription was generated
- `language`: Detected language of the speech
### InterimTranscriptionFrame
Similar to `TranscriptionFrame`, but for interim (not final) transcriptions.
## 5. LLM (Language Model) Frames
### LLMMessagesFrame
Contains a list of messages for an LLM service to process.
### LLMMessagesAppendFrame and LLMMessagesUpdateFrame
Used to modify the current context of LLM messages.
### LLMSetToolsFrame
Specifies tools (functions) available for the LLM to use.
### LLMEnablePromptCachingFrame
Controls prompt caching in certain LLMs.
## 6. System and Control Frames
### SystemFrame
Base class for system-level frames.
Important system frames include:
- `StartFrame`: Initiates a pipeline
- `CancelFrame`: Stops a pipeline immediately
- `ErrorFrame`: Notifies of errors (with `FatalErrorFrame` for unrecoverable errors)
- `EndTaskFrame` and `CancelTaskFrame`: Control pipeline tasks
- `StartInterruptionFrame` and `StopInterruptionFrame`: Indicate user speech for interruptions
### ControlFrame
Base class for control-flow frames.
Notable control frames:
- `EndFrame`: Signals the end of a pipeline
- `LLMFullResponseStartFrame` and `LLMFullResponseEndFrame`: Bracket LLM responses
- `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame`: Indicate user speech activity
- `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame`: Indicate bot speech activity
- `TTSStartedFrame` and `TTSStoppedFrame`: Bracket Text-to-Speech responses
## 7. Special Purpose Frames
### MetricsFrame
Contains performance metrics data.
### FunctionCallInProgressFrame and FunctionCallResultFrame
Used for handling LLM function (tool) calls.
### ServiceUpdateSettingsFrame
Base class for updating service settings, with specific subclasses for LLM, TTS, and STT services.
## Conclusion
Understanding these frame types is essential for working with the Pipecat system. Each frame type serves a specific purpose in the pipeline, whether it's carrying data (like audio or images), controlling the flow of the pipeline, or managing system-level operations. By using the appropriate frame types, you can effectively process and transmit various kinds of information through your pipeline.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 92 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 92 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 117 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

View File

@@ -29,6 +29,9 @@ CARTESIA_API_KEY=...
DAILY_API_KEY=...
DAILY_SAMPLE_ROOM_URL=https://...
# Deepgram
DEEPGRAM_API_KEY=...
# ElevenLabs
ELEVENLABS_API_KEY=...
ELEVENLABS_VOICE_ID=...
@@ -44,11 +47,21 @@ FIREWORKS_API_KEY=...
# Gladia
GLADIA_API_KEY=...
GLADIA_REGION=...
# Google
GOOGLE_API_KEY=...
GOOGLE_CLOUD_PROJECT_ID=...
GOOGLE_TEST_CREDENTIALS=...
GOOGLE_VERTEX_TEST_CREDENTIALS=...
# LMNT
LMNT_API_KEY=...
LMNT_VOICE_ID=...
# Perplexity
PERPLEXITY_API_KEY=...
# PlayHT
PLAY_HT_USER_ID=...
PLAY_HT_API_KEY=...
@@ -122,7 +135,6 @@ SONIOX_API_KEY=
# Speechmatics
SPEECHMATICS_API_KEY=...
# SambaNova
SAMBANOVA_API_KEY=...
@@ -130,4 +142,4 @@ SAMBANOVA_API_KEY=...
SENTRY_DSN=...
# Heygen
HEYGEN_API_KEY=...
HEYGEN_API_KEY=...

View File

@@ -34,7 +34,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
@@ -43,14 +43,17 @@ async def run_bot(transport: BaseTransport):
base_url=os.getenv("PIPER_BASE_URL"), aiohttp_session=session, sample_rate=24000
)
task = PipelineTask(Pipeline([tts, transport.output()]))
task = PipelineTask(
Pipeline([tts, transport.output()]),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -58,7 +61,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -33,7 +33,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
@@ -44,14 +44,17 @@ async def run_bot(transport: BaseTransport):
aiohttp_session=session,
)
task = PipelineTask(Pipeline([tts, transport.output()]))
task = PipelineTask(
Pipeline([tts, transport.output()]),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -59,7 +62,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -33,7 +33,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
tts = CartesiaTTSService(
@@ -41,14 +41,17 @@ async def run_bot(transport: BaseTransport):
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
)
task = PipelineTask(Pipeline([tts, transport.output()]))
task = PipelineTask(
Pipeline([tts, transport.output()]),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -56,7 +59,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -33,19 +33,22 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
task = PipelineTask(Pipeline([tts, transport.output()]))
task = PipelineTask(
Pipeline([tts, transport.output()]),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -53,7 +56,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -9,10 +9,14 @@ import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.aggregators.openai_llm_context import (
OpenAILLMContext,
OpenAILLMContextFrame,
)
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.cartesia.tts import CartesiaTTSService
@@ -34,7 +38,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
tts = CartesiaTTSService(
@@ -51,14 +55,17 @@ async def run_bot(transport: BaseTransport):
}
]
task = PipelineTask(Pipeline([llm, tts, transport.output()]))
task = PipelineTask(
Pipeline([llm, tts, transport.output()]),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
await task.queue_frames([LLMMessagesFrame(messages), EndFrame()])
await task.queue_frames([OpenAILLMContextFrame(OpenAILLMContext(messages)), EndFrame()])
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -66,7 +73,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -40,7 +40,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
@@ -51,7 +51,10 @@ async def run_bot(transport: BaseTransport):
key=os.getenv("FAL_KEY"),
)
task = PipelineTask(Pipeline([imagegen, transport.output()]))
task = PipelineTask(
Pipeline([imagegen, transport.output()]),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@transport.event_handler("on_client_connected")
@@ -63,7 +66,7 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -71,7 +74,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -39,7 +39,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
imagegen = GoogleImageGenService(
@@ -52,6 +52,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@@ -66,7 +67,7 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -74,7 +75,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -18,6 +18,7 @@ from loguru import logger
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -103,7 +104,7 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):

View File

@@ -13,6 +13,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -86,7 +87,7 @@ async def main():
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):

View File

@@ -9,7 +9,6 @@ import json
import os
import sys
from deepgram import LiveOptions
from dotenv import load_dotenv
from loguru import logger
@@ -51,12 +50,7 @@ async def main():
),
)
stt = DeepgramSTTService(
api_key=os.getenv("DEEPGRAM_API_KEY"),
live_options=LiveOptions(
vad_events=True,
),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
@@ -78,20 +72,20 @@ async def main():
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
runner = PipelineRunner()
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(
Pipeline(
[
transport.input(),
stt,
context_aggregator.user(),
llm,
tts,
transport.output(),
context_aggregator.assistant(),
],
),
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
@@ -132,6 +126,8 @@ async def main():
],
)
runner = PipelineRunner()
await runner.run(task)

View File

@@ -15,13 +15,16 @@ from pipecat.frames.frames import (
DataFrame,
Frame,
LLMFullResponseStartFrame,
LLMMessagesFrame,
TextFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.aggregators.openai_llm_context import (
OpenAILLMContext,
OpenAILLMContextFrame,
)
from pipecat.processors.aggregators.sentence import SentenceAggregator
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.runner.types import RunnerArguments
@@ -83,7 +86,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
"""Run the Calendar Month Narration bot using WebRTC transport.
Args:
@@ -153,9 +156,12 @@ async def run_bot(transport: BaseTransport):
}
]
frames.append(MonthFrame(month=month))
frames.append(LLMMessagesFrame(messages))
frames.append(OpenAILLMContextFrame(OpenAILLMContext(messages)))
task = PipelineTask(pipeline)
task = PipelineTask(
pipeline,
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Set up transport event handlers
@transport.event_handler("on_client_connected")
@@ -170,14 +176,14 @@ async def run_bot(transport: BaseTransport):
await task.cancel()
# Run the pipeline
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -15,7 +15,6 @@ from loguru import logger
from pipecat.frames.frames import (
Frame,
LLMMessagesFrame,
OutputAudioRawFrame,
TextFrame,
TTSAudioRawFrame,
@@ -25,6 +24,10 @@ from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.aggregators.openai_llm_context import (
OpenAILLMContext,
OpenAILLMContextFrame,
)
from pipecat.processors.aggregators.sentence import SentenceAggregator
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
@@ -137,7 +140,7 @@ async def main():
)
task = PipelineTask(pipeline)
await task.queue_frame(LLMMessagesFrame(messages))
await task.queue_frame(OpenAILLMContextFrame(OpenAILLMContext(messages)))
await task.stop_when_done()
await runner.run(task)

View File

@@ -10,7 +10,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import Frame, MetricsFrame
from pipecat.frames.frames import Frame, LLMRunFrame, MetricsFrame
from pipecat.metrics.metrics import (
LLMUsageMetricsData,
ProcessingMetricsData,
@@ -76,7 +76,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -119,6 +119,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -126,21 +127,21 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -15,6 +15,7 @@ from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
Frame,
LLMRunFrame,
OutputImageRawFrame,
)
from pipecat.pipeline.pipeline import Pipeline
@@ -91,7 +92,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -137,27 +138,28 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -10,6 +10,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -48,7 +49,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -88,6 +89,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -95,14 +97,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -110,7 +112,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -10,6 +10,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -47,7 +48,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -87,6 +88,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -94,14 +96,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -109,7 +111,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -9,6 +9,7 @@ import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -48,7 +49,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
"""Speechmatics STT Service Example
This example demonstrates using Speechmatics Speech-to-Text service with speaker diarization and intelligent speaker management. Key features:
@@ -146,6 +147,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -153,14 +155,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Say a short hello to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -168,7 +170,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -10,6 +10,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -52,7 +53,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
"""Run example using Speechmatics STT.
This example will use diarization within our STT service and output the words spoken by
@@ -129,6 +130,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -136,14 +138,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Say a short hello to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -151,7 +153,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -45,7 +46,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = SonioxSTTService(
@@ -86,6 +87,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -93,14 +95,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -108,7 +110,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -12,6 +12,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
@@ -101,6 +102,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -108,14 +110,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -123,7 +125,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -12,6 +12,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -50,7 +51,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
@@ -93,6 +94,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -100,14 +102,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -115,7 +117,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -89,6 +90,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -96,14 +98,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -111,7 +113,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -16,15 +16,20 @@ from langchain_openai import ChatOpenAI
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.frames.frames import LLMMessagesUpdateFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_response import (
LLMAssistantResponseAggregator,
LLMUserResponseAggregator,
LLMAssistantContextAggregator,
LLMUserContextAggregator,
)
from pipecat.processors.aggregators.openai_llm_context import (
OpenAILLMContext,
)
from pipecat.processors.frameworks.langchain import LangchainProcessor
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.transports.base_transport import BaseTransport, TransportParams
@@ -65,7 +70,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -95,8 +100,9 @@ async def run_bot(transport: BaseTransport):
)
lc = LangchainProcessor(history_chain)
tma_in = LLMUserResponseAggregator()
tma_out = LLMAssistantResponseAggregator()
context = OpenAILLMContext()
tma_in = LLMUserContextAggregator(context=context)
tma_out = LLMAssistantContextAggregator(context=context)
pipeline = Pipeline(
[
@@ -116,24 +122,25 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
# the `LLMMessagesFrame` will be picked up by the LangchainProcessor using
# An `OpenAILLMContextFrame` will be picked up by the LangchainProcessor using
# only the content of the last message to inject it in the prompt defined
# above. So no role is required here.
messages = [({"content": "Please briefly introduce yourself to the user."})]
await task.queue_frames([LLMMessagesFrame(messages)])
await task.queue_frames([LLMMessagesUpdateFrame(messages, run_llm=True)])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -141,7 +148,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -12,8 +12,8 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.frames.frames import (
BotInterruptionFrame,
StopInterruptionFrame,
LLMRunFrame,
StartInterruptionFrame,
UserStartedSpeakingFrame,
UserStoppedSpeakingFrame,
)
@@ -52,7 +52,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(
@@ -92,29 +92,30 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@stt.event_handler("on_speech_started")
async def on_speech_started(stt, *args, **kwargs):
await task.queue_frames([BotInterruptionFrame(), UserStartedSpeakingFrame()])
await task.queue_frames([StartInterruptionFrame(), UserStartedSpeakingFrame()])
@stt.event_handler("on_utterance_end")
async def on_utterance_end(stt, *args, **kwargs):
await task.queue_frames([StopInterruptionFrame(), UserStoppedSpeakingFrame()])
await task.queue_frames([UserStoppedSpeakingFrame()])
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -122,7 +123,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -86,6 +87,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -93,14 +95,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -108,7 +110,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -12,6 +12,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -50,7 +51,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
@@ -93,6 +94,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -100,14 +102,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -115,7 +117,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -89,6 +90,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -96,14 +98,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -111,7 +113,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -48,7 +49,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -89,6 +90,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -96,14 +98,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -111,7 +113,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -91,6 +92,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -98,14 +100,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -113,7 +115,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -48,7 +49,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = AzureSTTService(
@@ -95,6 +96,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -102,14 +104,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -117,7 +119,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -48,7 +49,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = OpenAISTTService(
@@ -90,6 +91,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -97,14 +99,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -112,7 +114,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -12,6 +12,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -94,6 +95,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -101,14 +103,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -116,7 +118,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -12,6 +12,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
@@ -92,6 +93,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -99,14 +101,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -114,7 +116,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -50,11 +51,12 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = GladiaSTTService(
api_key=os.getenv("GLADIA_API_KEY", ""),
region=os.getenv("GLADIA_REGION"),
params=GladiaInputParams(
language_config=LanguageConfig(
languages=[Language.EN],
@@ -97,6 +99,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -104,21 +107,21 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -48,7 +49,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -85,6 +86,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -92,14 +94,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -107,7 +109,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = GroqSTTService(api_key=os.getenv("GROQ_API_KEY"))
@@ -90,6 +91,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -97,14 +99,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -112,7 +114,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -9,6 +9,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -46,7 +47,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = AWSTranscribeSTTService()
@@ -91,6 +92,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -98,14 +100,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "user", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -113,7 +115,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -0,0 +1,164 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
"""
A conversational AI bot using Gemini for both LLM and TTS.
This example demonstrates how to use Gemini's TTS capabilities with the new
GeminiTTSService, which uses Gemini's TTS-specific models instead of Google Cloud TTS.
Features showcased:
- Gemini LLM for conversation
- Gemini TTS with natural voice control
- Support for different voice personalities
- Style and tone control through natural language prompts
Run with:
python examples/foundational/gemini-tts.py
Make sure to set your environment variables:
export GOOGLE_API_KEY=your_api_key_here
"""
import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.google.llm import GoogleLLMService
from pipecat.services.google.stt import GoogleSTTService
from pipecat.services.google.tts import GeminiTTSService
from pipecat.transcriptions.language import Language
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
from pipecat.transports.services.daily import DailyParams
load_dotenv(override=True)
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
# instantiated. The function will be called when the desired transport gets
# selected.
transport_params = {
"daily": lambda: DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
"twilio": lambda: FastAPIWebsocketParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot with Gemini TTS")
stt = GoogleSTTService(
params=GoogleSTTService.InputParams(languages=Language.EN_US),
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
)
tts = GeminiTTSService(
api_key=os.getenv("GOOGLE_API_KEY"),
model="gemini-2.5-flash-preview-tts", # TTS-specific model
voice_id="Charon",
params=GeminiTTSService.InputParams(language=Language.EN_US),
)
llm = GoogleLLMService(
api_key=os.getenv("GOOGLE_API_KEY"),
model="gemini-2.5-flash",
)
# System message that instructs the AI on how to speak
messages = [
{
"role": "system",
"content": """You are a helpful AI assistant in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way.
IMPORTANT: Since you're using Gemini TTS which supports natural voice control, you can include speaking instructions in your responses. For example:
- "Say cheerfully: Welcome to our conversation!"
- "Read this in a calm, professional tone: Here are the details you requested."
- "Speak in an excited whisper: I have some great news to share!"
- "Say slowly and clearly: Let me explain this step by step."
Feel free to use natural language instructions to control your voice style, tone, pace, and emotion. The TTS system will interpret these instructions and adjust the speech accordingly.
Your output will be converted to audio, so avoid special characters in your answers. Respond to what the user said in a creative and helpful way.""",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt, # STT
context_aggregator.user(), # User responses
llm, # LLM
tts, # Gemini TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation with a styled introduction
messages.append(
{
"role": "system",
"content": "Say cheerfully and warmly: Hello! I'm your AI assistant powered by Gemini's new TTS technology. I can speak with different voices, tones, and styles. How can I help you today?",
}
)
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = GoogleSTTService(
@@ -98,6 +99,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -105,14 +107,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -120,7 +122,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = AssemblyAISTTService(
@@ -91,6 +92,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -98,14 +100,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -113,7 +115,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -12,6 +12,7 @@ from loguru import logger
from pipecat.audio.filters.krisp_filter import KrispFilter
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -52,7 +53,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -89,6 +90,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -96,14 +98,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -111,7 +113,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -12,6 +12,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -50,7 +51,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
@@ -94,6 +95,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -101,14 +103,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -116,7 +118,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -48,7 +49,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -88,6 +89,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -95,14 +97,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -110,7 +112,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -48,7 +49,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = RivaSTTService(api_key=os.getenv("NVIDIA_API_KEY"))
@@ -85,6 +86,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -92,14 +94,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -107,7 +109,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -18,6 +18,7 @@ from pipecat.frames.frames import (
InputAudioRawFrame,
LLMFullResponseEndFrame,
LLMFullResponseStartFrame,
LLMRunFrame,
StartInterruptionFrame,
TextFrame,
TranscriptionFrame,
@@ -213,7 +214,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
llm = GoogleLLMService(
@@ -266,6 +267,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -273,14 +275,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -288,7 +290,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -89,6 +90,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -96,14 +98,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -111,7 +113,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -59,7 +59,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
tts = CartesiaTTSService(
@@ -82,6 +82,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -93,7 +94,7 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -101,7 +102,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -12,6 +12,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -50,7 +51,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
@@ -93,6 +94,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -100,14 +102,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -115,7 +117,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -48,7 +49,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -88,6 +89,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -95,14 +97,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -110,7 +112,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,6 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -49,7 +50,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = FalSTTService(
@@ -91,6 +92,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -98,14 +100,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -113,7 +115,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -12,6 +12,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -78,7 +79,7 @@ async def main():
)
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
runner = PipelineRunner()

View File

@@ -12,6 +12,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -51,7 +52,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
@@ -95,6 +96,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -102,14 +104,14 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -117,7 +119,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -0,0 +1,126 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.sarvam.tts import SarvamHttpTTSService
from pipecat.transcriptions.language import Language
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
from pipecat.transports.services.daily import DailyParams
load_dotenv(override=True)
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
# instantiated. The function will be called when the desired transport gets
# selected.
transport_params = {
"daily": lambda: DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
"twilio": lambda: FastAPIWebsocketParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
async with aiohttp.ClientSession() as session:
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = SarvamHttpTTSService(
api_key=os.getenv("SARVAM_API_KEY"),
aiohttp_session=session,
params=SarvamHttpTTSService.InputParams(language=Language.EN),
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -5,6 +5,7 @@
#
import asyncio
import os
import aiohttp
@@ -12,6 +13,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -21,7 +23,6 @@ from pipecat.runner.utils import create_transport
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.sarvam.tts import SarvamTTSService
from pipecat.transcriptions.language import Language
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
from pipecat.transports.services.daily import DailyParams
@@ -51,72 +52,73 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
async with aiohttp.ClientSession() as session:
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = SarvamTTSService(
api_key=os.getenv("SARVAM_API_KEY"),
aiohttp_session=session,
params=SarvamTTSService.InputParams(language=Language.EN),
)
tts = SarvamTTSService(
api_key=os.getenv("SARVAM_API_KEY"),
model="bulbul:v2",
voice_id="manisha",
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMRunFrame()])
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
)
# Optionally, you can wait for 30 seconds and then change the voice.
# await asyncio.sleep(30)
# await task.queue_frame(TTSUpdateSettingsFrame(settings={"voice": "anushka"}))
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
runner = PipelineRunner(handle_sigint=False)
await runner.run(task)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -6,9 +6,13 @@ from typing import Tuple
import aiohttp
from dotenv import load_dotenv
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.processors.aggregators import SentenceAggregator
from pipecat.processors.aggregators.openai_llm_context import (
OpenAILLMContext,
OpenAILLMContextFrame,
)
from pipecat.runner.daily import configure
from pipecat.services.azure import AzureLLMService, AzureTTSService
from pipecat.services.elevenlabs import ElevenLabsTTSService
@@ -79,7 +83,7 @@ async def main():
sentence_aggregator = SentenceAggregator()
pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)
await source_queue.put(LLMMessagesFrame(messages))
await source_queue.put(OpenAILLMContextFrame(OpenAILLMContext(messages)))
await source_queue.put(EndFrame())
await pipeline.run_pipeline()

View File

@@ -72,7 +72,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
pipeline = Pipeline([transport.input(), MirrorProcessor(), transport.output()])
@@ -80,6 +80,7 @@ async def run_bot(transport: BaseTransport):
task = PipelineTask(
pipeline,
params=PipelineParams(),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -91,7 +92,7 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -99,7 +100,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -75,7 +75,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
tk_root = tk.Tk()
@@ -97,6 +97,7 @@ async def run_bot(transport: BaseTransport):
task = PipelineTask(
pipeline,
params=PipelineParams(),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
async def run_tk():
@@ -115,7 +116,7 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await asyncio.gather(runner.run(task), run_tk())
@@ -123,7 +124,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -49,7 +49,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -92,6 +92,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -105,7 +106,7 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -113,7 +114,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -101,7 +101,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
@@ -143,7 +143,10 @@ async def run_bot(transport: BaseTransport):
]
)
task = PipelineTask(pipeline)
task = PipelineTask(
pipeline,
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
@@ -157,7 +160,7 @@ async def run_bot(transport: BaseTransport):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -165,7 +168,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,7 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
@@ -70,7 +70,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
user_response = UserResponseAggregator()
@@ -103,7 +103,10 @@ async def run_bot(transport: BaseTransport):
]
)
task = PipelineTask(pipeline)
task = PipelineTask(
pipeline,
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
@@ -116,14 +119,14 @@ async def run_bot(transport: BaseTransport):
image_requester.set_participant_id(client_id)
# Welcome message
await tts.say("Hi there! Feel free to ask me what I see.")
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -131,7 +134,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,7 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -70,7 +70,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
user_response = UserResponseAggregator()
@@ -109,6 +109,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -122,14 +123,14 @@ async def run_bot(transport: BaseTransport):
image_requester.set_participant_id(client_id)
# Welcome message
await tts.say("Hi there! Feel free to ask me what I see.")
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -137,7 +138,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

View File

@@ -11,7 +11,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -70,7 +70,7 @@ transport_params = {
}
async def run_bot(transport: BaseTransport):
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
user_response = UserResponseAggregator()
@@ -109,6 +109,7 @@ async def run_bot(transport: BaseTransport):
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
@@ -122,14 +123,14 @@ async def run_bot(transport: BaseTransport):
image_requester.set_participant_id(client_id)
# Welcome message
await tts.say("Hi there! Feel free to ask me what I see.")
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
@@ -137,7 +138,7 @@ async def run_bot(transport: BaseTransport):
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport)
await run_bot(transport, runner_args)
if __name__ == "__main__":

Some files were not shown because too many files have changed in this diff Show More