Make the temporary build script more flexible for cross-architecture builds

Merge pull request #2504 from pipecat-ai/aleix/cartesia-fix-timeout-reconnection
CartesiaTTSService: reconnect on Cartesia's timeout
2025-08-26 09:05:21 -04:00 · 2025-08-25 15:24:31 -07:00 · 2025-08-25 14:09:03 -07:00 · 2025-08-25 09:28:21 -07:00 · 2025-08-25 08:22:28 -07:00 · 2025-08-23 06:34:37 -07:00
283 changed files with 9815 additions and 3767 deletions
--- a/.github/workflows/coverage.yaml
+++ b/.github/workflows/coverage.yaml
@@ -25,7 +25,7 @@ jobs:
          version: "latest"

      - name: Set up Python
-        run: uv python install 3.10
+        run: uv python install 3.12

      - name: Install system packages
        run: |
--- a/.github/workflows/python-compatibility.yaml
+++ b/.github/workflows/python-compatibility.yaml
@@ -0,0 +1,61 @@
+name: Python Compatibility Test
+
+on:
+  push:
+    branches: [main, develop]
+    paths: ['pyproject.toml']
+  pull_request:
+    branches: [main, develop]
+    paths: ['pyproject.toml']
+
+jobs:
+  test-compatibility:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.10.18', '3.11.13', '3.12.11', '3.13.5']
+
+    name: Python ${{ matrix.python-version }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y \
+            portaudio19-dev \
+            libcairo2-dev \
+            libgirepository1.0-dev \
+            pkg-config
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: 'latest'
+
+      - name: Set up Python ${{ matrix.python-version }}
+        run: |
+          uv python install ${{ matrix.python-version }}
+          uv python pin ${{ matrix.python-version }}
+
+      - name: Test uv sync with all extras (Python < 3.13)
+        if: "!startsWith(matrix.python-version, '3.13.')"
+        run: |
+          uv sync --group dev --all-extras --no-extra krisp
+
+      - name: Test uv sync without PyTorch extras (Python 3.13+)
+        if: startsWith(matrix.python-version, '3.13.')
+        run: |
+          uv sync --group dev --all-extras \
+            --no-extra krisp \
+            --no-extra ultravox \
+            --no-extra local-smart-turn \
+            --no-extra moondream \
+            --no-extra mlx-whisper
+
+      - name: Verify installation
+        run: |
+          uv run python --version
+          uv run python -c "import pipecat; print('✅ Pipecat imports successfully')"
--- a/.github/workflows/sync-quickstart.yaml
+++ b/.github/workflows/sync-quickstart.yaml
@@ -23,17 +23,12 @@ jobs:
          token: ${{ secrets.QUICKSTART_SYNC_TOKEN }}
          path: quickstart-repo

-      - name: Sync files (excluding READMEs)
+      - name: Sync files (excluding uv.lock and README.md)
        run: |
-          # Copy code files only, skip READMEs
-          cp examples/quickstart/bot.py quickstart-repo/
-          cp examples/quickstart/requirements.txt quickstart-repo/
-          cp examples/quickstart/env.example quickstart-repo/
-
-          # Copy any other files that aren't README.md
+          # Copy all files except uv.lock and README.md
          find examples/quickstart -type f \
            -not -name "README.md" \
-            -not -name "*.md" \
+            -not -name "uv.lock" \
            -exec cp {} quickstart-repo/ \;

      - name: Commit and push changes
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -29,7 +29,7 @@ jobs:
          version: "latest"

      - name: Set up Python
-        run: uv python install 3.10
+        run: uv python install 3.12

      - name: Install system packages
        run: |
--- a/.github/workflows/update-lockfile.yaml
+++ b/.github/workflows/update-lockfile.yaml
@@ -1,42 +0,0 @@
-name: Update lockfile
-
-on:
-  push:
-    paths:
-      - 'pyproject.toml'
-    branches:
-      - main
-  workflow_dispatch: # Allows manual triggering from GitHub UI
-
-jobs:
-  update-lockfile:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          # This gives the workflow permission to push back to the repo
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v1
-
-      - name: Update lockfile
-        run: uv lock
-
-      - name: Check for changes
-        id: verify-changed-files
-        run: |
-          if [ -n "$(git status --porcelain)" ]; then
-            echo "changed=true" >> $GITHUB_OUTPUT
-          else
-            echo "changed=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Commit lockfile
-        if: steps.verify-changed-files.outputs.changed == 'true'
-        run: |
-          git config --local user.email "action@github.com"  
-          git config --local user.name "GitHub Action"
-          git add uv.lock
-          git commit -m "chore: update uv.lock after dependency changes"
-          git push
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,11 +7,244 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]

+### Fixed
+
+- Fixed a `CartesiaTTSService` issue that was causing the application to hang
+  after Cartesia's 5 minutes timed out.
+
+## [0.0.81] - 2025-08-25
+
 ### Added

+- Added `pipecat.extensions.voicemail`, a module for detecting voicemail vs.
+  live conversation, primarily intended for use in outbound calling scenarios.
+  The voicemail module is optimized for text LLMs only.
+
+- Added new frames to the `idle_timeout_frames` arg: `TranscriptionFrame`,
+  `InterimTranscriptionFrame`, `UserStartedSpeakingFrame`, and
+  `UserStoppedSpeakingFrame`. These additions serve as indicators of user
+  activity in the pipeline idle detection logic.
+
+- Allow passing custom pipeline sink and source processors to a
+  `Pipeline`. Pipeline source and sink processors are used to know and control
+  what's coming in and out of a `Pipeline` processor.
+
+- Added `FrameProcessor.pause_processing_system_frames()` and
+  `FrameProcessor.resume_processing_system_frames()`. These allow to pause and
+  resume the processing of system frame.
+
+- Added new `on_process_frame()` observer method which makes it possible to know
+  when a frame is being processed.
+
+- Added new `FrameProcessor.entry_processor()` method. This allows you to access
+  the first non-compound processor in a pipeline.
+
+- Added `FrameProcessor` properties `processors`, `next` and `previous`.
+
+- `ElevenLabsTTSService` now supports additional runtime changes to the `model`,
+  `language`, and `voice_settings` parameters.
+
+- Added `apply_text_normalization` support to `ElevenLabsTTSService` and
+  `ElevenLabsHttpTTSService`.
+
+- Added `MistralLLMService`, using Mistral's chat completion API.
+
+- Added the ability to retry executing a chat completion after a timeout period
+  for `OpenAILLMService` and its subclasses, `AnthropicLLMService`, and
+  `AWSBedrockLLMService`. The LLM services accept new args:
+  `retry_timeout_secs` and `retry_on_timeout`. This feature is disabled by
+  default.
+
+### Changed
+
+- Updated `daily-python` to 0.19.7.
+
+### Deprecated
+
+- `FrameProcessor.wait_for_task()` is deprecated. Use `await task` or `await
+  asyncio.wait_for(task, timeout)` instead.
+
+### Removed
+
+- Watchdog timers have been removed. They were introduced in 0.0.72 to help
+  diagnose pipeline freezes. Unfortunately, they proved ineffective since they
+  required developers to use Pipecat-specific queues, iterators, and events to
+  correctly reset the timer, which limited their usefulness and added friction.
+
+- Removed unused `FrameProcessor.set_parent()` and
+  `FrameProcessor.get_parent()`.
+
+### Fixed
+
+- Fixed an issue that would cause `PipelineRunner` and `PipelineTask` to not
+  handle external asyncio task cancellation properly.
+
+- Added `SpeechmaticsSTTService` exception handling on connection and sending.
+
+- Replaced `asyncio.wait_for()` for `wait_for2.wait_for()` for Python <
+  3.12. because of issues regarding task cancellation (i.e. cancellation is
+  never propagated).
+  See https://bugs.python.org/issue42130
+
+- Fixed an `AudioBufferProcessor` issues that would cause audio overlap when
+  setting a max buffer size.
+
+- Fixed an issue where `AsyncAITTSService` had very high latency in responding
+  by adding `force=true` when sending the flush command.
+
+### Performance
+
+- Improve `PipelineTask` performance by using direct mode processors and by
+  removing unnecessary tasks.
+
+- Improve `ParallelPipeline` performance by using direct mode, by not
+  creating a task for each frame and every sub-pipeline and also by removing
+  other unnecessary tasks.
+
+- `Pipeline` performance improvements by using direct mode.
+
+### Other
+
+- Added `14w-function-calling-mistal.py` using `MistralLLMService`.
+
+- Added `13j-azure-transcription.py` using `AzureSTTService`.
+
+## [0.0.80] - 2025-08-13
+
+### Added
+
+- Added `GeminiTTSService` which uses Google Gemini to generate TTS output. The
+  Gemini model can be prompted to insert styled speech to control the TTS
+  output.
+
+- Added Exotel support to Pipecat's development runner. You can now connect
+  using the runner with `uv run bot.py -t exotel` and an ngrok connection to
+  HTTP port 7860.
+
+- Added `enable_direct_mode` argument to `FrameProcessor`. The direct mode is
+  for processors which require very little I/O or compute resources, that is
+  processors that can perform their task almost immediately. These type of
+  processors don't need any of the internal tasks and queues usually created by
+  frame processors which means overall application performance might be slightly
+  increased. Use with care.
+
+- Added TTFB metrics for `HeyGenVideoService` and `TavusVideoService`.
+
+- Added `endpoint_id` parameter to `AzureSTTService`. ([Custom EndpointId](https://docs.azure.cn/en-us/ai-services/speech-service/how-to-recognize-speech?pivots=programming-language-python#use-a-custom-endpoint))
+
+### Changed
+
+- `WatchdogPriorityQueue` now requires the items to be inserted to always be
+  tuples and the size of the tuple needs to be specified in the constructor when
+  creating the queue with the `tuple_size` argument.
+
+- Updated Moondream to revision `2025-01-09`.
+
+- Updated `PlayHTHttpTTSService` to no longer use the `pyht` client to remove
+  compatibility issues with other packages. Now you can use the PlayHT HTTP
+  service with other services, like GoogleLLMService.
+
+- Updated `pyproject.toml` to once again pin `numba` to `>=0.61.2` in order to
+  resolve package versioning issues.
+
+- Updated the `STTMuteFilter` to include `VADUserStartedSpeakingFrame` and
+  `VADUserStoppedSpeakingFrame` in the list of frames to filter when the
+  filtering is on.
+
+### Performance
+
+- Improving the latency of the `HeyGenVideoService`.
+
+- Improved some frame processors performance by using the new frame processor
+  direct mode. In direct mode a frame processor will process frames right away
+  avoiding the need for internal queues and tasks. This is useful for some
+  simple processors. For example, in processors that wrap other processors
+  (e.g. `Pipeline`, `ParallelPipeline`), we add one processor before and one
+  after the wrapped processors (internally, you will see them as sources and
+  sinks). These sources and sinks don't do any special processing and they
+  basically forward frames. So, for these simple processors we now enable the
+  new direct mode which avoids creating any internal tasks (and queues) and
+  therefore improves performance.
+
+### Fixed
+
+- Fixed an issue with the `BaseWhisperSTTService` where the language was
+  specified as an enum and not a string.
+
+- Fixed an issue where `SmallWebRTCTransport` ended before TTS finished.
+
+- Fixed an issue in `OpenAIRealtimeBetaLLMService` where specifying a `text`
+  `modalities` didn't result in text being outputted from the model.
+
+- Added SSML reserved character escaping to `AzureBaseTTSService` to properly
+  handle special characters in text sent to Azure TTS. This fixes an issue
+  where characters like `&`, `<`, `>`, `"`, and `'` in LLM-generated text would
+  cause TTS failures.
+
+- Fixed a `WatchdogPriorityQueue` issue that could cause an exception when
+  compating watchdog cancel sentinel items with other items in the queue.
+
+- Fixed an issue that would cause system frames to not be processed with higher
+  priority than other frames. This could cause slower interruption times.
+
+- Fixed an issue where retrying a websocket connection error would result in an
+  error.
+
+### Other
+
+- Add foundation example `19b-openai-realtime-beta-text.py`, showing how to use
+  `OpenAIRealtimeBetaLLMService` to output text to a TTS service.
+
+- Add vision support to release evals so we can run the foundational examples 12
+  series.
+
+- Added foundational example `15a-switch-languages.py` to release evals. It is
+  able to detect if we switched the language properly.
+
+- Updated foundational examples to show how to enclose complex logic
+  (e.g. `ParallelPipeline`) into a single processor so the main pipeline becomes
+  simpler.
+
+- Added `07n-interruptible-gemini.py`, demonstrating how to use
+  `GeminiTTSService`.
+
+## [0.0.79] - 2025-08-07
+
+### Changed
+
+- Changed `pipecat-ai`'s `openai` dependency to `>=1.74.0,<=1.99.1` due to a
+  breaking change in `openai` 1.99.2 ([commit](https://github.com/openai/openai-python/commit/657f551dbe583ffb259d987dafae12c6211fba06))
+
+### Deprecated
+
+- `TTSService.say()` is deprecated, push a `TTSSpeakFrame` instead. Calling
+  functions directly is a discouraged pattern in Pipecat because, for example,
+  it might cause issues with frame ordering.
+
+- `LLMMessagesFrame` is deprecated, in favor of either:
+
+  - `LLMMessagesUpdateFrame` with `run_llm=True`
+  - `OpenAILLMContextFrame` with desired messages in a new context
+
+- `LLMUserResponseAggregator` and `LLMAssistantResponseAggregator` are
+  deprecated, as they depended on the now-deprecated `LLMMessagesFrame`. Use
+  `LLMUserContextAggregator` and `LLMAssistantResponseAggregator` (or
+  LLM-specific subclasses thereof) instead.
+
+## [0.0.78] - 2025-08-07
+
+### Added
+
+- Added `enable_emulated_vad_interruptions` to `LLMUserAggregatorParams`.
+  When user speech is emulated (e.g. when a transcription is received but
+  VAD doesn't detect speech), this parameter controls whether the emulated
+  speech can interrupt the bot. Default is False (emulated speech is ignored
+  while the bot is speaking).
+
 - Added new `handle_sigint` and `handle_sigterm` to `RunnerArguments`. This
  allows applications to know what settings they should use for the environment
-  they are running on.
+  they are running on. Also, added `pipeline_idle_timeout_secs` to be able to
+  control the `PipelineTask` idle timeout.

 - Added `processor` field to `ErrorFrame` to indicate `FrameProcessor` that
  generated the error.
@@ -46,6 +279,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added Chinese, Japanese, Korean word timestamp support to
  `CartesiaTTSService`.

+- Added `region` parameter to `GladiaSTTService`. Accepted values: eu-west
+  (default), us-west.
+
 ### Changed

 - System frames are now queued. Before, system frames could be generated from
@@ -77,8 +313,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - The development runner now strips any provided protocol (e.g. https://) from
  the proxy address and issues a warning. It also strips trailing `/`.

+### Deprecated
+
+- In the `pipecat.runner.daily`, the `configure_with_args()` function is
+  deprecated. Use the `configure()` function instead.
+
+- The development runner's `/connect` endpoint is deprecated and will be
+  removed in a future version. Use the `/start` endpoint in its place. In the
+  meantime, both endpoints work and deliver equivalent functionality.
+
 ### Fixed

+- Fixed a `DailyTransport` issue that would result in an unhandled
+  `concurrent.futures.CancelledError` when a future is cancelled.
+
+- Fixed a `RivaSTTService` issue that would result in an unhandled
+  `concurrent.futures.CancelledError` when a future is cancelled when reading
+  from the audio chunks from the incoming audio stream.
+
+- Fixed an issue in the `BaseOutputTransport`, mainly reproducible with
+  `FastAPIWebsocketOutputTransport` when the audio mixer was enabled, where the
+  loop could consume 100% CPU by continuously returning without delay, preventing
+  other asyncio tasks (such as cancellation or shutdown signals) from being
+  processed.
+
 - Fixed an issue where `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame`
  were not emitted when using `TavusVideoService` or `HeyGenVideoService`.

@@ -98,14 +356,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed an issue in `TaskObserver` (a proxy to all observers) that was degrading
  global performance.

-### Deprecated
+### Other

- In the `pipecat.runner.daily`, the `configure_with_args()` function is
-  deprecated. Use the `configure()` function instead.
-
- The development runner's `/connect` endpoint is deprecated and will be
-  removed in a future version. Use the `/start` endpoint in its place. In the
-  meantime, both endpoints work and deliver equivalent functionality.
+- Added `07aa-interruptible-soniox.py`, `07ab-interruptible-inworld-http.py`,
+  `07ac-interruptible-asyncai.py` and `07ac-interruptible-asyncai-http.py`
+  release evals.

 ## [0.0.77] - 2025-07-31

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -31,6 +31,23 @@ git push origin your-branch-name

 Our maintainers will review your PR, and once everything is good, your contributions will be merged!

+## Dependency Management
+
+This project uses [uv](https://docs.astral.sh/uv/) for dependency management. The `uv.lock` file is committed to ensure reproducible builds.
+
+### Adding or Updating Dependencies
+
+1. Edit `pyproject.toml` to add/update dependencies
+2. Run `uv lock` to update the lockfile with new dependency resolution
+3. Run `uv sync` to install the updated dependencies locally
+4. Always commit both files together:
+   ```bash
+   git add pyproject.toml uv.lock
+   git commit -m "feat: add new dependency for feature X"
+   ```
+
+**Important:** Never manually edit `uv.lock`. It's auto-generated by `uv lock`.
+
 ## Code Style and Documentation

 ### Python Code Style
--- a/README.md
+++ b/README.md
@@ -54,7 +54,7 @@ You can connect to Pipecat from any platform using our official SDKs:
 | Category            | Services                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | Speech-to-Text      | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper)                                                                                                                                                                                          |
-| LLMs                | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together)                                                                                                                                                          |
+| LLMs                | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together)                                                                                          |
 | Text-to-Speech      | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
 | Speech-to-Speech    | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | Transport           | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
@@ -112,6 +112,13 @@ You can get started with Pipecat running on your local machine, then move your a

 ## 🛠️ Contributing to the framework

+### Prerequisites
+
+**Minimum Python Version:** 3.10
+**Recommended Python Version:** 3.12
+
+### Setup Steps
+
 1. Clone the repository and navigate to it:

   ```bash
@@ -122,7 +129,7 @@ You can get started with Pipecat running on your local machine, then move your a
 2. Install development and testing dependencies:

   ```bash
-   uv sync --group dev --all-extras --no-extra krisp
+   uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp --no-extra local
   ```

 3. Install the git pre-commit hooks:
@@ -131,6 +138,25 @@ You can get started with Pipecat running on your local machine, then move your a
   uv run pre-commit install
   ```

+### Python 3.13+ Compatibility
+
+Some features require PyTorch, which doesn't yet support Python 3.13+. Install using:
+
+```bash
+uv sync --group dev --all-extras \
+  --no-extra gstreamer \
+  --no-extra krisp \
+  --no-extra local \
+  --no-extra local-smart-turn \
+  --no-extra mlx-whisper \
+  --no-extra moondream \
+  --no-extra ultravox
+```
+
+> **Tip:** For full compatibility, use Python 3.12: `uv python pin 3.12`
+
+> **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors.
+
 ### Running tests

 To run all tests, from the root directory:
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,10 +0,0 @@
-# Pipecat Docs
-
-## [Architecture Overview](architecture.md)
-
-Learn about the thinking behind the framework's design.
-
-## [A Frame's Progress](frame-progress.md)
-
-See how a Frame is processed through a Transport, a Pipeline, and a series of Frame Processors.
-
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -1,17 +0,0 @@
-# Pipecat architecture guide
-
-## Frames
-
-Frames can represent discrete chunks of data, for instance a chunk of text, a chunk of audio, or an image. They can also be used to as control flow, for instance a frame that indicates that there is no more data available, or that a user started or stopped talking. They can also represent more complex data structures, such as a message array used for an LLM completion.
-
-## FrameProcessors
-
-Frame processors operate on frames. Every frame processor implements a `process_frame` method that consumes one frame and produces zero or more frames. Frame processors can do simple transforms, such as concatenating text fragments into sentences, or they can treat frames as input for an AI Service, and emit chat completions based on message arrays or transform text into audio or images.
-
-## Pipelines
-
-Pipelines are lists of frame processors linked together. Frame processors can push frames upstream or downstream to their peers. A very simple pipeline might chain an LLM frame processor to a text-to-speech frame processor, with a transport as an output.
-
-## Transports
-
-Transports provide input and output frame processors to receive or send frames respectively. For example, the `DailyTransport` does this with a WebRTC session joined to a Daily.co room.
--- a/docs/frame-progress.md
+++ b/docs/frame-progress.md
@@ -1,46 +0,0 @@
-# A Frame's Progress
-
-1. A user says “Hello, LLM” and the cloud transcription service delivers a transcription to the Transport.
-![A transcript frame arrives](images/frame-progress-01.png)
-
-2. The Transport places a Transcription frame in the Pipeline’s source queue.
-![Frame in source queue](images/frame-progress-02.png)
-
-3. The Pipeline passes the Transcription frame to the first Frame Processor in its list, the LLM User Message Aggregator.
-![To UMA](images/frame-progress-03.png)
-
-4. The LLM User Message Aggregator updates the LLM Context with a `{“user”: “Hello LLM”}` message.
-![Update context](images/frame-progress-04.png)
-
-5. The LLM User Message Aggregator yields an LLM Message Frame, containing the updated LLM Context. The Pipeline passes this frame to the LLM Frame Processor.
-![Update context](images/frame-progress-05.png)
-
-6. The LLM Frame Processor creates a streaming chat completion based on the LLM context and yields the first chunk of a response, Text Frame with the value “Hi, “. The Pipeline passes this frame to the TTS Frame Processor. The TTS Frame Processor aggregates this response but doesn’t yield anything, yet, because it’s waiting for a full sentence.
-![LLM yields Text](images/frame-progress-06.png)
-
-7. The LLM Frame Processor yields another Text Frame with the value “there.”. The Pipeline passes this frame to the TTS Frame Processor.
-![LLM yields more Text](images/frame-progress-07.png)
-
-8. The TTS Frame Processor now has a full sentence, so it starts streaming audio based on “Hi, there.” It yields the first chunk of streaming audio as an Audio frame, which the Pipeline passes to the LLM Assistant Message Aggregator.
-![TTS yields Audio](images/frame-progress-08.png)
-
-9. The LLM Assistant Message Aggregator doesn’t do anything with Audio frames, so it immediately yields the frame, unchanged. This is the convention for all Frame Processors: frames that the processor doesn’t process should be immediately yielded.
-![pass-through](images/frame-progress-09.png)
-
-10. The Pipeline places the first Audio frame in its sink queue, which is being watched by the Transport. Since the frame is now in a queue, the Pipeline can continue processing other frames. Note that the source and sink queues form a sort of “boundary of concurrent processing” between a Pipeline and the outside world. In a Pipeline, Frames are processed sequentially; once a Frame is on a queue it can be processed in parallel with the frames being processed by the Pipeline. TODO: link to a more in-depth section about this.
-![sink queue](images/frame-progress-10.png)
-
-11. The TTS Frame Processor yields another Audio frame as the Transport transmits the first Audio frame.
-![parallel audio](images/frame-progress-11.png)
-
-12. As before, the LLM Assistant Message Aggregator immediately yields the Audio frame and the Pipeline places the Audio frame in the sink queue.
-![sink queue 2](images/frame-progress-12.png)
-
-13. The TTS Frame Processor has no more frames to yield. The LLM Frame Processor emits an LLM Response End Frame, which the Pipeline passes to the TTS Frame Processor.
-![response end](images/frame-progress-13.png)
-
-14. The TTS Frame Processor immediately yields the LLM Response End Frame, so the Pipeline passes it along to the LLM Assistant Message Aggregator. The LLM Assistant Message Aggregator updates the LLM Context with the full response from the LLM. TODO TODO: I realized I forgot that the TSS Frame Processor also yields the Text frames that the LLM emitted so that the LLM Assistant Message Aggregator could accumulate them, arrggh.
-![response end](images/frame-progress-14.png)
-
-15. The system is quiet, and waiting for the next message from the Transport.
-![response end](images/frame-progress-15.png)
--- a/docs/frame.md
+++ b/docs/frame.md
@@ -1,110 +0,0 @@
-# Understanding Different Frame Types in the Pipecat System
-
-In the Pipecat system, frames are used to represent different types of data and control signals that flow through the pipeline. Understanding these frame types is crucial for working with the system effectively. This tutorial will cover the main categories of frames and their specific uses.
-
-## 1. Base Frame Classes
-
-### Frame
-The `Frame` class is the base class for all frames. It includes:
- `id`: A unique identifier
- `name`: A descriptive name
- `pts`: Presentation timestamp (optional)
-
-### DataFrame
-`DataFrame` is a subclass of `Frame` and serves as a base for most data-carrying frames.
-
-## 2. Audio Frames
-
-### AudioRawFrame
-Represents a chunk of audio with properties:
- `audio`: Raw audio data
- `sample_rate`: Audio sample rate
- `num_channels`: Number of audio channels
-
-Subclasses include:
- `InputAudioRawFrame`: For audio from input sources
- `OutputAudioRawFrame`: For audio to be played by output devices
- `TTSAudioRawFrame`: For audio generated by Text-to-Speech services
-
-## 3. Image Frames
-
-### ImageRawFrame
-Represents an image with properties:
- `image`: Raw image data
- `size`: Image dimensions
- `format`: Image format (e.g., JPEG, PNG)
-
-Subclasses include:
- `InputImageRawFrame`: For images from input sources
- `OutputImageRawFrame`: For images to be displayed
- `UserImageRawFrame`: For images associated with a specific user
- `VisionImageRawFrame`: For images with associated text for description
- `URLImageRawFrame`: For images with an associated URL
-
-### SpriteFrame
-Represents an animated sprite, containing a list of `ImageRawFrame` objects.
-
-## 4. Text and Transcription Frames
-
-### TextFrame
-Represents a chunk of text, used for various purposes in the pipeline.
-
-### TranscriptionFrame
-A specialized `TextFrame` for speech transcriptions, including:
- `user_id`: ID of the speaking user
- `timestamp`: When the transcription was generated
- `language`: Detected language of the speech
-
-### InterimTranscriptionFrame
-Similar to `TranscriptionFrame`, but for interim (not final) transcriptions.
-
-## 5. LLM (Language Model) Frames
-
-### LLMMessagesFrame
-Contains a list of messages for an LLM service to process.
-
-### LLMMessagesAppendFrame and LLMMessagesUpdateFrame
-Used to modify the current context of LLM messages.
-
-### LLMSetToolsFrame
-Specifies tools (functions) available for the LLM to use.
-
-### LLMEnablePromptCachingFrame
-Controls prompt caching in certain LLMs.
-
-## 6. System and Control Frames
-
-### SystemFrame
-Base class for system-level frames.
-
-Important system frames include:
- `StartFrame`: Initiates a pipeline
- `CancelFrame`: Stops a pipeline immediately
- `ErrorFrame`: Notifies of errors (with `FatalErrorFrame` for unrecoverable errors)
- `EndTaskFrame` and `CancelTaskFrame`: Control pipeline tasks
- `StartInterruptionFrame` and `StopInterruptionFrame`: Indicate user speech for interruptions
-
-### ControlFrame
-Base class for control-flow frames.
-
-Notable control frames:
- `EndFrame`: Signals the end of a pipeline
- `LLMFullResponseStartFrame` and `LLMFullResponseEndFrame`: Bracket LLM responses
- `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame`: Indicate user speech activity
- `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame`: Indicate bot speech activity
- `TTSStartedFrame` and `TTSStoppedFrame`: Bracket Text-to-Speech responses
-
-## 7. Special Purpose Frames
-
-### MetricsFrame
-Contains performance metrics data.
-
-### FunctionCallInProgressFrame and FunctionCallResultFrame
-Used for handling LLM function (tool) calls.
-
-### ServiceUpdateSettingsFrame
-Base class for updating service settings, with specific subclasses for LLM, TTS, and STT services.
-
-## Conclusion
-
-Understanding these frame types is essential for working with the Pipecat system. Each frame type serves a specific purpose in the pipeline, whether it's carrying data (like audio or images), controlling the flow of the pipeline, or managing system-level operations. By using the appropriate frame types, you can effectively process and transmit various kinds of information through your pipeline.
--- a/docs/images/frame-progress-01.png
+++ b/docs/images/frame-progress-01.png
--- a/docs/images/frame-progress-02.png
+++ b/docs/images/frame-progress-02.png
--- a/docs/images/frame-progress-03.png
+++ b/docs/images/frame-progress-03.png
--- a/docs/images/frame-progress-04.png
+++ b/docs/images/frame-progress-04.png
--- a/docs/images/frame-progress-05.png
+++ b/docs/images/frame-progress-05.png
--- a/docs/images/frame-progress-06.png
+++ b/docs/images/frame-progress-06.png
--- a/docs/images/frame-progress-07.png
+++ b/docs/images/frame-progress-07.png
--- a/docs/images/frame-progress-08.png
+++ b/docs/images/frame-progress-08.png
--- a/docs/images/frame-progress-09.png
+++ b/docs/images/frame-progress-09.png
--- a/docs/images/frame-progress-10.png
+++ b/docs/images/frame-progress-10.png
--- a/docs/images/frame-progress-11.png
+++ b/docs/images/frame-progress-11.png
--- a/docs/images/frame-progress-12.png
+++ b/docs/images/frame-progress-12.png
--- a/docs/images/frame-progress-13.png
+++ b/docs/images/frame-progress-13.png
--- a/docs/images/frame-progress-14.png
+++ b/docs/images/frame-progress-14.png
--- a/docs/images/frame-progress-15.png
+++ b/docs/images/frame-progress-15.png
--- a/env.example
+++ b/env.example
@@ -29,6 +29,9 @@ CARTESIA_API_KEY=...
 DAILY_API_KEY=...
 DAILY_SAMPLE_ROOM_URL=https://...

+# Deepgram
+DEEPGRAM_API_KEY=...
+
 # ElevenLabs
 ELEVENLABS_API_KEY=...
 ELEVENLABS_VOICE_ID=...
@@ -44,6 +47,7 @@ FIREWORKS_API_KEY=...

 # Gladia
 GLADIA_API_KEY=...
+GLADIA_REGION=...

 # Google
 GOOGLE_API_KEY=...
@@ -135,4 +139,4 @@ SAMBANOVA_API_KEY=...
 SENTRY_DSN=...

 # Heygen
-HEYGEN_API_KEY=...
+HEYGEN_API_KEY=...
--- a/examples/foundational/01-say-one-thing-piper.py
+++ b/examples/foundational/01-say-one-thing-piper.py
@@ -43,7 +43,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            base_url=os.getenv("PIPER_BASE_URL"), aiohttp_session=session, sample_rate=24000
        )

-        task = PipelineTask(Pipeline([tts, transport.output()]))
+        task = PipelineTask(
+            Pipeline([tts, transport.output()]),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+        )

        # Register an event handler so we can play the audio when the client joins
        @transport.event_handler("on_client_connected")
--- a/examples/foundational/01-say-one-thing-rime.py
+++ b/examples/foundational/01-say-one-thing-rime.py
@@ -44,7 +44,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            aiohttp_session=session,
        )

-        task = PipelineTask(Pipeline([tts, transport.output()]))
+        task = PipelineTask(
+            Pipeline([tts, transport.output()]),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+        )

        # Register an event handler so we can play the audio when the client joins
        @transport.event_handler("on_client_connected")
--- a/examples/foundational/01-say-one-thing.py
+++ b/examples/foundational/01-say-one-thing.py
@@ -41,7 +41,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
    )

-    task = PipelineTask(Pipeline([tts, transport.output()]))
+    task = PipelineTask(
+        Pipeline([tts, transport.output()]),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    # Register an event handler so we can play the audio when the client joins
    @transport.event_handler("on_client_connected")
--- a/examples/foundational/01c-fastpitch.py
+++ b/examples/foundational/01c-fastpitch.py
@@ -38,7 +38,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))

-    task = PipelineTask(Pipeline([tts, transport.output()]))
+    task = PipelineTask(
+        Pipeline([tts, transport.output()]),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    # Register an event handler so we can play the audio when the client joins
    @transport.event_handler("on_client_connected")
--- a/examples/foundational/02-llm-say-one-thing.py
+++ b/examples/foundational/02-llm-say-one-thing.py
@@ -9,10 +9,14 @@ import os
 from dotenv import load_dotenv
 from loguru import logger

-from pipecat.frames.frames import EndFrame, LLMMessagesFrame
+from pipecat.frames.frames import EndFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.openai_llm_context import (
+    OpenAILLMContext,
+    OpenAILLMContextFrame,
+)
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.cartesia.tts import CartesiaTTSService
@@ -51,12 +55,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        }
    ]

-    task = PipelineTask(Pipeline([llm, tts, transport.output()]))
+    task = PipelineTask(
+        Pipeline([llm, tts, transport.output()]),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    # Register an event handler so we can play the audio when the client joins
    @transport.event_handler("on_client_connected")
    async def on_client_connected(transport, client):
-        await task.queue_frames([LLMMessagesFrame(messages), EndFrame()])
+        await task.queue_frames([OpenAILLMContextFrame(OpenAILLMContext(messages)), EndFrame()])

    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)

--- a/examples/foundational/03-still-frame.py
+++ b/examples/foundational/03-still-frame.py
@@ -51,7 +51,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            key=os.getenv("FAL_KEY"),
        )

-        task = PipelineTask(Pipeline([imagegen, transport.output()]))
+        task = PipelineTask(
+            Pipeline([imagegen, transport.output()]),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+        )

        # Register an event handler so we can play the audio when the client joins
        @transport.event_handler("on_client_connected")
--- a/examples/foundational/03b-still-frame-imagen.py
+++ b/examples/foundational/03b-still-frame-imagen.py
@@ -52,6 +52,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    # Register an event handler so we can play the audio when the client joins
--- a/examples/foundational/04-transports-small-webrtc.py
+++ b/examples/foundational/04-transports-small-webrtc.py
@@ -110,7 +110,7 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
        logger.info(f"Client disconnected")
        await task.cancel()

-    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+    runner = PipelineRunner(handle_sigint=False)

    await runner.run(task)

--- a/examples/foundational/05-sync-speech-and-image.py
+++ b/examples/foundational/05-sync-speech-and-image.py
@@ -15,13 +15,16 @@ from pipecat.frames.frames import (
    DataFrame,
    Frame,
    LLMFullResponseStartFrame,
-    LLMMessagesFrame,
    TextFrame,
 )
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
 from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.openai_llm_context import (
+    OpenAILLMContext,
+    OpenAILLMContextFrame,
+)
 from pipecat.processors.aggregators.sentence import SentenceAggregator
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.runner.types import RunnerArguments
@@ -153,9 +156,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                }
            ]
            frames.append(MonthFrame(month=month))
-            frames.append(LLMMessagesFrame(messages))
+            frames.append(OpenAILLMContextFrame(OpenAILLMContext(messages)))

-        task = PipelineTask(pipeline)
+        task = PipelineTask(
+            pipeline,
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+        )

        # Set up transport event handlers
        @transport.event_handler("on_client_connected")
--- a/examples/foundational/05a-local-sync-speech-and-image.py
+++ b/examples/foundational/05a-local-sync-speech-and-image.py
@@ -15,7 +15,6 @@ from loguru import logger

 from pipecat.frames.frames import (
    Frame,
-    LLMMessagesFrame,
    OutputAudioRawFrame,
    TextFrame,
    TTSAudioRawFrame,
@@ -25,6 +24,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
 from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.openai_llm_context import (
+    OpenAILLMContext,
+    OpenAILLMContextFrame,
+)
 from pipecat.processors.aggregators.sentence import SentenceAggregator
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.services.cartesia.tts import CartesiaHttpTTSService
@@ -137,7 +140,7 @@ async def main():
            )

            task = PipelineTask(pipeline)
-            await task.queue_frame(LLMMessagesFrame(messages))
+            await task.queue_frame(OpenAILLMContextFrame(OpenAILLMContext(messages)))
            await task.stop_when_done()

            await runner.run(task)
--- a/examples/foundational/06-listen-and-respond.py
+++ b/examples/foundational/06-listen-and-respond.py
@@ -119,6 +119,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/06a-image-sync.py
+++ b/examples/foundational/06a-image-sync.py
@@ -137,6 +137,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07-interruptible-cartesia-http.py
+++ b/examples/foundational/07-interruptible-cartesia-http.py
@@ -88,6 +88,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07-interruptible.py
+++ b/examples/foundational/07-interruptible.py
@@ -87,6 +87,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07a-interruptible-speechmatics-vad.py
+++ b/examples/foundational/07a-interruptible-speechmatics-vad.py
@@ -146,6 +146,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07a-interruptible-speechmatics.py
+++ b/examples/foundational/07a-interruptible-speechmatics.py
@@ -129,6 +129,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07aa-interruptible-soniox.py
+++ b/examples/foundational/07aa-interruptible-soniox.py
@@ -86,6 +86,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07ab-interruptible-inworld-http.py
+++ b/examples/foundational/07ab-interruptible-inworld-http.py
@@ -101,6 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                enable_metrics=True,
                enable_usage_metrics=True,
            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
        )

        @transport.event_handler("on_client_connected")
--- a/examples/foundational/07ac-interruptible-asyncai-http.py
+++ b/examples/foundational/07ac-interruptible-asyncai-http.py
@@ -93,6 +93,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                enable_metrics=True,
                enable_usage_metrics=True,
            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
        )

        @transport.event_handler("on_client_connected")
--- a/examples/foundational/07ac-interruptible-asyncai.py
+++ b/examples/foundational/07ac-interruptible-asyncai.py
@@ -89,6 +89,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07b-interruptible-langchain.py
+++ b/examples/foundational/07b-interruptible-langchain.py
@@ -16,13 +16,16 @@ from langchain_openai import ChatOpenAI
 from loguru import logger

 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import LLMMessagesFrame
+from pipecat.frames.frames import LLMMessagesUpdateFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantResponseAggregator,
-    LLMUserResponseAggregator,
+    LLMAssistantContextAggregator,
+    LLMUserContextAggregator,
+)
+from pipecat.processors.aggregators.openai_llm_context import (
+    OpenAILLMContext,
 )
 from pipecat.processors.frameworks.langchain import LangchainProcessor
 from pipecat.runner.types import RunnerArguments
@@ -97,8 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    )
    lc = LangchainProcessor(history_chain)

-    tma_in = LLMUserResponseAggregator()
-    tma_out = LLMAssistantResponseAggregator()
+    context = OpenAILLMContext()
+    tma_in = LLMUserContextAggregator(context=context)
+    tma_out = LLMAssistantContextAggregator(context=context)

    pipeline = Pipeline(
        [
@@ -118,17 +122,18 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
    async def on_client_connected(transport, client):
        logger.info(f"Client connected")
        # Kick off the conversation.
-        # the `LLMMessagesFrame` will be picked up by the LangchainProcessor using
+        # An `OpenAILLMContextFrame` will be picked up by the LangchainProcessor using
        # only the content of the last message to inject it in the prompt defined
        # above. So no role is required here.
        messages = [({"content": "Please briefly introduce yourself to the user."})]
-        await task.queue_frames([LLMMessagesFrame(messages)])
+        await task.queue_frames([LLMMessagesUpdateFrame(messages, run_llm=True)])

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/07c-interruptible-deepgram-vad.py
+++ b/examples/foundational/07c-interruptible-deepgram-vad.py
@@ -92,6 +92,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @stt.event_handler("on_speech_started")
--- a/examples/foundational/07c-interruptible-deepgram.py
+++ b/examples/foundational/07c-interruptible-deepgram.py
@@ -86,6 +86,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07d-interruptible-elevenlabs-http.py
+++ b/examples/foundational/07d-interruptible-elevenlabs-http.py
@@ -93,6 +93,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                enable_metrics=True,
                enable_usage_metrics=True,
            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
        )

        @transport.event_handler("on_client_connected")
--- a/examples/foundational/07d-interruptible-elevenlabs.py
+++ b/examples/foundational/07d-interruptible-elevenlabs.py
@@ -89,6 +89,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07e-interruptible-playht-http.py
+++ b/examples/foundational/07e-interruptible-playht-http.py
@@ -89,6 +89,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07e-interruptible-playht.py
+++ b/examples/foundational/07e-interruptible-playht.py
@@ -91,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07f-interruptible-azure.py
+++ b/examples/foundational/07f-interruptible-azure.py
@@ -95,6 +95,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07g-interruptible-openai.py
+++ b/examples/foundational/07g-interruptible-openai.py
@@ -90,6 +90,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07h-interruptible-openpipe.py
+++ b/examples/foundational/07h-interruptible-openpipe.py
@@ -94,6 +94,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07i-interruptible-xtts.py
+++ b/examples/foundational/07i-interruptible-xtts.py
@@ -92,6 +92,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                enable_metrics=True,
                enable_usage_metrics=True,
            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
        )

        @transport.event_handler("on_client_connected")
--- a/examples/foundational/07j-interruptible-gladia.py
+++ b/examples/foundational/07j-interruptible-gladia.py
@@ -55,6 +55,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    stt = GladiaSTTService(
        api_key=os.getenv("GLADIA_API_KEY", ""),
+        region=os.getenv("GLADIA_REGION"),
        params=GladiaInputParams(
            language_config=LanguageConfig(
                languages=[Language.EN],
@@ -97,6 +98,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07k-interruptible-lmnt.py
+++ b/examples/foundational/07k-interruptible-lmnt.py
@@ -85,6 +85,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07l-interruptible-groq.py
+++ b/examples/foundational/07l-interruptible-groq.py
@@ -90,6 +90,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07m-interruptible-aws.py
+++ b/examples/foundational/07m-interruptible-aws.py
@@ -91,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07n-interruptible-gemini.py
+++ b/examples/foundational/07n-interruptible-gemini.py
@@ -0,0 +1,163 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+"""
+A conversational AI bot using Gemini for both LLM and TTS.
+
+This example demonstrates how to use Gemini's TTS capabilities with the new
+GeminiTTSService, which uses Gemini's TTS-specific models instead of Google Cloud TTS.
+
+Features showcased:
+- Gemini LLM for conversation
+- Gemini TTS with natural voice control
+- Support for different voice personalities
+- Style and tone control through natural language prompts
+
+Run with:
+    python examples/foundational/gemini-tts.py
+
+Make sure to set your environment variables:
+    export GOOGLE_API_KEY=your_api_key_here
+"""
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.google.llm import GoogleLLMService
+from pipecat.services.google.stt import GoogleSTTService
+from pipecat.services.google.tts import GeminiTTSService
+from pipecat.transcriptions.language import Language
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
+from pipecat.transports.services.daily import DailyParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot with Gemini TTS")
+
+    stt = GoogleSTTService(
+        params=GoogleSTTService.InputParams(languages=Language.EN_US),
+        credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
+    )
+
+    tts = GeminiTTSService(
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        model="gemini-2.5-flash-preview-tts",  # TTS-specific model
+        voice_id="Charon",
+        params=GeminiTTSService.InputParams(language=Language.EN_US),
+    )
+
+    llm = GoogleLLMService(
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        model="gemini-2.5-flash",
+    )
+
+    # System message that instructs the AI on how to speak
+    messages = [
+        {
+            "role": "system",
+            "content": """You are a helpful AI assistant in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way.
+
+            IMPORTANT: Since you're using Gemini TTS which supports natural voice control, you can include speaking instructions in your responses. For example:
+            - "Say cheerfully: Welcome to our conversation!"
+            - "Read this in a calm, professional tone: Here are the details you requested."
+            - "Speak in an excited whisper: I have some great news to share!"
+            - "Say slowly and clearly: Let me explain this step by step."
+
+            Feel free to use natural language instructions to control your voice style, tone, pace, and emotion. The TTS system will interpret these instructions and adjust the speech accordingly.
+
+            Your output will be converted to audio, so avoid special characters in your answers. Respond to what the user said in a creative and helpful way.""",
+        },
+    ]
+
+    context = OpenAILLMContext(messages)
+    context_aggregator = llm.create_context_aggregator(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # Gemini TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation with a styled introduction
+        messages.append(
+            {
+                "role": "system",
+                "content": "Say cheerfully and warmly: Hello! I'm your AI assistant powered by Gemini's new TTS technology. I can speak with different voices, tones, and styles. How can I help you today?",
+            }
+        )
+        await task.queue_frames([context_aggregator.user().get_context_frame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/07n-interruptible-google.py
+++ b/examples/foundational/07n-interruptible-google.py
@@ -98,6 +98,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07o-interruptible-assemblyai.py
+++ b/examples/foundational/07o-interruptible-assemblyai.py
@@ -91,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07p-interruptible-krisp.py
+++ b/examples/foundational/07p-interruptible-krisp.py
@@ -89,6 +89,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07q-interruptible-rime-http.py
+++ b/examples/foundational/07q-interruptible-rime-http.py
@@ -94,6 +94,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                enable_metrics=True,
                enable_usage_metrics=True,
            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
        )

        @transport.event_handler("on_client_connected")
--- a/examples/foundational/07q-interruptible-rime.py
+++ b/examples/foundational/07q-interruptible-rime.py
@@ -88,6 +88,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07r-interruptible-riva-nim.py
+++ b/examples/foundational/07r-interruptible-riva-nim.py
@@ -85,6 +85,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07s-interruptible-google-audio-in.py
+++ b/examples/foundational/07s-interruptible-google-audio-in.py
@@ -266,6 +266,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07t-interruptible-fish.py
+++ b/examples/foundational/07t-interruptible-fish.py
@@ -89,6 +89,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07u-interruptible-ultravox.py
+++ b/examples/foundational/07u-interruptible-ultravox.py
@@ -82,6 +82,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07v-interruptible-neuphonic-http.py
+++ b/examples/foundational/07v-interruptible-neuphonic-http.py
@@ -93,6 +93,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                enable_metrics=True,
                enable_usage_metrics=True,
            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
        )

        @transport.event_handler("on_client_connected")
--- a/examples/foundational/07v-interruptible-neuphonic.py
+++ b/examples/foundational/07v-interruptible-neuphonic.py
@@ -88,6 +88,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07w-interruptible-fal.py
+++ b/examples/foundational/07w-interruptible-fal.py
@@ -91,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/07y-interruptible-minimax.py
+++ b/examples/foundational/07y-interruptible-minimax.py
@@ -95,6 +95,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                enable_metrics=True,
                enable_usage_metrics=True,
            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
        )

        @transport.event_handler("on_client_connected")
--- a/examples/foundational/07z-interruptible-sarvam.py
+++ b/examples/foundational/07z-interruptible-sarvam.py
@@ -94,6 +94,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                enable_metrics=True,
                enable_usage_metrics=True,
            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
        )

        @transport.event_handler("on_client_connected")
--- a/examples/foundational/08-bots-arguing.py
+++ b/examples/foundational/08-bots-arguing.py
@@ -6,9 +6,13 @@ from typing import Tuple
 import aiohttp
 from dotenv import load_dotenv

-from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
+from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, TextFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.processors.aggregators import SentenceAggregator
+from pipecat.processors.aggregators.openai_llm_context import (
+    OpenAILLMContext,
+    OpenAILLMContextFrame,
+)
 from pipecat.runner.daily import configure
 from pipecat.services.azure import AzureLLMService, AzureTTSService
 from pipecat.services.elevenlabs import ElevenLabsTTSService
@@ -79,7 +83,7 @@ async def main():
            sentence_aggregator = SentenceAggregator()
            pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)

-            await source_queue.put(LLMMessagesFrame(messages))
+            await source_queue.put(OpenAILLMContextFrame(OpenAILLMContext(messages)))
            await source_queue.put(EndFrame())
            await pipeline.run_pipeline()

--- a/examples/foundational/09-mirror.py
+++ b/examples/foundational/09-mirror.py
@@ -80,6 +80,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    task = PipelineTask(
        pipeline,
        params=PipelineParams(),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/09a-local-mirror.py
+++ b/examples/foundational/09a-local-mirror.py
@@ -97,6 +97,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    task = PipelineTask(
        pipeline,
        params=PipelineParams(),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    async def run_tk():
--- a/examples/foundational/10-wake-phrase.py
+++ b/examples/foundational/10-wake-phrase.py
@@ -92,6 +92,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/11-sound-effects.py
+++ b/examples/foundational/11-sound-effects.py
@@ -143,7 +143,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        ]
    )

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    @transport.event_handler("on_client_connected")
    async def on_client_connected(transport, client):
--- a/examples/foundational/12-describe-video.py
+++ b/examples/foundational/12-describe-video.py
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
 from loguru import logger

 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
+from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineTask
@@ -103,7 +103,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        ]
    )

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    @transport.event_handler("on_client_connected")
    async def on_client_connected(transport, client):
@@ -116,7 +119,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        image_requester.set_participant_id(client_id)

        # Welcome message
-        await tts.say("Hi there! Feel free to ask me what I see.")
+        await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/12a-describe-video-gemini-flash.py
+++ b/examples/foundational/12a-describe-video-gemini-flash.py
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
 from loguru import logger

 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
+from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -109,6 +109,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
@@ -122,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        image_requester.set_participant_id(client_id)

        # Welcome message
-        await tts.say("Hi there! Feel free to ask me what I see.")
+        await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/12b-describe-video-gpt-4o.py
+++ b/examples/foundational/12b-describe-video-gpt-4o.py
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
 from loguru import logger

 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
+from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -109,6 +109,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
@@ -122,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        image_requester.set_participant_id(client_id)

        # Welcome message
-        await tts.say("Hi there! Feel free to ask me what I see.")
+        await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/12c-describe-video-anthropic.py
+++ b/examples/foundational/12c-describe-video-anthropic.py
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
 from loguru import logger

 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
+from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -109,6 +109,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
@@ -122,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        image_requester.set_participant_id(client_id)

        # Welcome message
-        await tts.say("Hi there! Feel free to ask me what I see.")
+        await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/13-whisper-transcription.py
+++ b/examples/foundational/13-whisper-transcription.py
@@ -60,7 +60,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    pipeline = Pipeline([transport.input(), stt, tl])

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/13b-deepgram-transcription.py
+++ b/examples/foundational/13b-deepgram-transcription.py
@@ -54,7 +54,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    pipeline = Pipeline([transport.input(), stt, tl])

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/13c-gladia-transcription.py
+++ b/examples/foundational/13c-gladia-transcription.py
@@ -47,6 +47,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    stt = GladiaSTTService(
        api_key=os.getenv("GLADIA_API_KEY"),
+        region=os.getenv("GLADIA_REGION"),
        # live_options=LiveOptions(language=Language.FR),
    )

@@ -54,7 +55,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    pipeline = Pipeline([transport.input(), stt, tl])

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/13c-gladia-translation.py
+++ b/examples/foundational/13c-gladia-translation.py
@@ -56,6 +56,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    stt = GladiaSTTService(
        api_key=os.getenv("GLADIA_API_KEY"),
+        region=os.getenv("GLADIA_REGION"),
        params=GladiaInputParams(
            language_config=LanguageConfig(
                languages=[Language.EN],  # Input in English
@@ -75,7 +76,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    pipeline = Pipeline([transport.input(), stt, tl])

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/13d-assemblyai-transcription.py
+++ b/examples/foundational/13d-assemblyai-transcription.py
@@ -53,7 +53,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    pipeline = Pipeline([transport.input(), stt, tl])

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/13e-whisper-mlx.py
+++ b/examples/foundational/13e-whisper-mlx.py
@@ -87,6 +87,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_disconnected")
--- a/examples/foundational/13f-cartesia-transcription.py
+++ b/examples/foundational/13f-cartesia-transcription.py
@@ -54,7 +54,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    pipeline = Pipeline([transport.input(), stt, tl])

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/13g-sambanova-transcription.py
+++ b/examples/foundational/13g-sambanova-transcription.py
@@ -91,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_disconnected")
--- a/examples/foundational/13h-speechmatics-transcription.py
+++ b/examples/foundational/13h-speechmatics-transcription.py
@@ -74,7 +74,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    pipeline = Pipeline([transport.input(), stt, tl])

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/13i-soniox-transcription.py
+++ b/examples/foundational/13i-soniox-transcription.py
@@ -60,7 +60,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    pipeline = Pipeline([transport.input(), stt, tl])

-    task = PipelineTask(pipeline)
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/13j-azure-transcription.py
+++ b/examples/foundational/13j-azure-transcription.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.frames.frames import Frame, TranscriptionFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.azure.stt import AzureSTTService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
+from pipecat.transports.services.daily import DailyParams
+
+load_dotenv(override=True)
+
+
+class TranscriptionLogger(FrameProcessor):
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        await super().process_frame(frame, direction)
+
+        if isinstance(frame, TranscriptionFrame):
+            print(f"Transcription: {frame.text}")
+
+
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = AzureSTTService(
+        api_key=os.getenv("AZURE_SPEECH_API_KEY"),
+        region=os.getenv("AZURE_SPEECH_REGION"),
+    )
+
+    tl = TranscriptionLogger()
+
+    pipeline = Pipeline([transport.input(), stt, tl])
+
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/14-function-calling.py
+++ b/examples/foundational/14-function-calling.py
@@ -138,6 +138,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/examples/foundational/14a-function-calling-anthropic.py
+++ b/examples/foundational/14a-function-calling-anthropic.py
@@ -132,6 +132,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

    @transport.event_handler("on_client_connected")
--- a/Show More
+++ b/Show More