Merge pull request #3007 from pipecat-ai/aleix/pipecat-0.0.93

update CHANGELOG for 0.0.93
2025-11-07 13:25:25 -08:00 · 2025-11-07 13:17:44 -08:00 · 2025-11-07 11:34:11 -08:00 · 2025-11-07 11:30:41 -08:00 · 2025-11-07 11:30:41 -08:00 · 2025-11-07 11:30:41 -08:00
238 changed files with 18395 additions and 9590 deletions
--- a/.github/workflows/publish.yaml
+++ b/.github/workflows/publish.yaml
@@ -5,25 +5,25 @@ on:
    inputs:
      gitref:
        type: string
-        description: "what git tag to build (e.g. v0.0.74)"
+        description: 'what git tag to build (e.g. v0.0.74)'
        required: true

 jobs:
  build:
-    name: "Build and upload wheels"
+    name: 'Build and upload wheels'
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.gitref }}
-      
+
      - name: Install uv
        uses: astral-sh/setup-uv@v3
        with:
-          version: "latest"
+          version: 'latest'
      - name: Set up Python
-        run: uv python install 3.10
+        run: uv python install 3.12
      - name: Install development dependencies
        run: uv sync --group dev
      - name: Build project
@@ -35,9 +35,9 @@ jobs:
          path: ./dist

  publish-to-pypi:
-    name: "Publish to PyPI"
+    name: 'Publish to PyPI'
    runs-on: ubuntu-latest
-    needs: [ build ]
+    needs: [build]
    environment:
      name: pypi
      url: https://pypi.org/p/pipecat-ai
@@ -56,12 +56,12 @@ jobs:
          print-hash: true

  publish-to-test-pypi:
-    name: "Publish to Test PyPI"
+    name: 'Publish to Test PyPI'
    runs-on: ubuntu-latest
-    needs: [ build ]
+    needs: [build]
    environment:
      name: testpypi
-      url: https://pypi.org/p/pipecat-ai
+      url: https://test.pypi.org/p/pipecat-ai
    permissions:
      id-token: write
    steps:
@@ -70,7 +70,7 @@ jobs:
        with:
          name: wheels
          path: ./dist
-      - name: Publish to PyPI
+      - name: Publish to Test PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
--- a/.github/workflows/publish_test.yaml
+++ b/.github/workflows/publish_test.yaml
@@ -4,7 +4,7 @@ on: workflow_dispatch

 jobs:
  build:
-    name: "Build and upload wheels"
+    name: 'Build and upload wheels'
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo
@@ -15,9 +15,9 @@ jobs:
      - name: Install uv
        uses: astral-sh/setup-uv@v3
        with:
-          version: "latest"
+          version: 'latest'
      - name: Set up Python
-        run: uv python install 3.10
+        run: uv python install 3.12
      - name: Install development dependencies
        run: uv sync --group dev
      - name: Build project
@@ -29,12 +29,12 @@ jobs:
          path: ./dist

  publish-to-test-pypi:
-    name: "Publish to Test PyPI"
+    name: 'Publish to Test PyPI'
    runs-on: ubuntu-latest
    needs: [build]
    environment:
      name: testpypi
-      url: https://pypi.org/p/pipecat-ai
+      url: https://test.pypi.org/p/pipecat-ai
    permissions:
      id-token: write
    steps:
@@ -43,7 +43,7 @@ jobs:
        with:
          name: wheels
          path: ./dist
-      - name: Publish to PyPI
+      - name: Publish to Test PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,717 @@ All notable changes to **Pipecat** will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

+## [0.0.93] - 2025-11-07
+
+### Added
+
+- Added support for Sarvam Speech-to-Text service (`SarvamSTTService`) with
+  streaming WebSocket support for `saarika` (STT) and `saaras` (STT-translate)
+  models.
+
+- Added support for passing in a `ToolsSchema` in lieu of a list of provider-
+  specific dicts when initializing `OpenAIRealtimeLLMService` or when updating
+  it using `LLMUpdateSettingsFrame`.
+
+- Added `TransportParams.audio_out_silence_secs`, which specifies how many
+  seconds of silence to output when an `EndFrame` reaches the output
+  transport. This can help ensure that all audio data is fully delivered to
+  clients.
+
+- Added new `FrameProcessor.broadcast_frame()` method. This will push two
+  instances of a given frame class, one upstream and the other downstream.
+
+  ```python
+  await self.broadcast_frame(UserSpeakingFrame)
+  ```
+
+- Added `MetricsLogObserver` for logging performance metrics from `MetricsFrame`
+  instances. Supports filtering via `include_metrics` parameter to control which
+  metrics types are logged (TTFB, processing time, LLM token usage, TTS usage,
+  smart turn metrics).
+
+- Added `pronunciation_dictionary_locators` to `ElevenLabsTTSService` and
+  `ElevenLabsHttpTTSService`.
+
+- Added support for loading external observers. You can now register custom
+  pipeline observers by setting the `PIPECAT_OBSERVER_FILES` environment
+  variable. This variable should contain a colon-separated list of Python files
+  (e.g. `export PIPECAT_OBSERVER_FILES="observer1.py:observer2.py:..."`). Each
+  file must define a function with the following signature:
+
+  ```python
+  async def create_observers(task: PipelineTask) -> Iterable[BaseObserver]:
+      ...
+  ```
+
+- Added support for new sonic-3 languages in `CartesiaTTSService` and
+  `CartesiaHttpTTSService`.
+
+- `EndFrame` and `EndTaskFrame` have an optional `reason` field to indicate why
+  the pipeline is being ended.
+
+- `CancelFrame` and `CancelTaskFrame` have an optional `reason` field to
+  indicate why the pipeline is being canceled. This can be also specified when
+  you cancel a task with `PipelineTask.cancel(reason="cancellation reason")`.
+
+- Added `include_prob_metrics` parameter to Whisper STT services to enable access
+  to probability metrics from transcription results.
+
+- Added utility functions `extract_whisper_probability()`,
+  `extract_openai_gpt4o_probability()`, and `extract_deepgram_probability()` to
+  extract probability metrics from `TranscriptionFrame` objects for Whisper-based,
+  OpenAI GPT-4o-transcribe, and Deepgram STT services respectively.
+
+- Added `LLMSwitcher.register_direct_function()`. It works much like
+  `LLMSwitcher.register_function()` in that it's a shorthand for registering
+  functions on all LLMs in the switcher, but for direct functions.
+
+- Added `LLMSwitcher.register_direct_function()`. It works much like
+  `LLMSwitcher.register_function()` in that it's a shorthand for registering
+  a function on all LLMs in the switcher, except this new method takes a direct
+  function (a `FunctionSchema`-less function).
+
+- Added `MCPClient.get_tools_schema()` and `MCPClient.register_tools_schema()`
+  as a two-step alternative to `MCPClient.register_tools()`, to allow users to
+  pass MCP tools to, say, `GeminiLiveLLMService` (as well as other
+  speech-to-speech services) in the constructor.
+
+- Added support for passing in an `LLMSwicher` to `MCPClient.register_tools()`
+  (as well as the new `MCPClient.register_tools_schema()`).
+
+- Added `cpu_count` parameter to `LocalSmartTurnAnalyzerV3`. This is set to `1`
+  by default for more predictable performance on low-CPU systems.
+
+### Changed
+
+- Updated `simli-ai` to 0.1.25.
+
+- Improved `concatenate_aggregated_text()` to one word outputs from OpenAI
+  Realtime and Gemini Live. Text fragments are now correctly concatenated
+  without spaces when these patterns are detected.
+
+- `STTMuteFilter` no longer sends `STTMuteFrame` to the STT service. The filter
+  now blocks frames locally without instructing the STT service to stop
+  processing audio. This prevents inactivity-related errors (such as 409 errors
+  from Google STT) while maintaining the same muting behavior at the application
+  level. Important: The STTMuteFilter should be placed _after_ the STT service
+  itself.
+
+- Improved `GoogleSTTService` error handling to properly catch gRPC `Aborted`
+  exceptions (corresponding to 409 errors) caused by stream inactivity. These
+  exceptions are now logged at DEBUG level instead of ERROR level, since they
+  indicate expected behavior when no audio is sent for 10+ seconds (e.g., during
+  long silences or when audio input is blocked). The service automatically
+  reconnects when this occurs.
+
+- Bumped the `fastapi` dependency's upperbound to `<0.122.0`.
+
+- Updated the default model for `GoogleVertexLLMService` to `gemini-2.5-flash`.
+
+- Updated the `GoogleVertexLLMService` to use the `GoogleLLMService` as a base
+  class instead of the `OpenAILLMService`.
+
+- Updated STT and TTS services to pass through unverified language codes with a
+  warning instead of returning None. This allows developers to use newly
+  supported languages before Pipecat's service classes are updated, while still
+  providing guidance on verified languages.
+
+### Removed
+
+- Removed `needs_mcp_alternate_schema()` from `LLMService`. The mechanism that
+  relied on it went away.
+
+### Fixed
+
+- Restore backwards compatibility for vision/image features (broken in 0.0.92)
+  when using non-universal context and assistant aggregators.
+
+- Fixed `DeepgramSTTService._disconnect()` to properly await `is_connected()`
+  method call, which is an async coroutine in the Deepgram SDK.
+
+- Fixed an issue where the `SmallWebRTCRequest` dataclass in runner would scrub
+  arbitrary request data from client due to camelCase typing. This fixes data
+  passthrough for JS clients where `APIRequest` is used.
+
+- Fixed a bug in `GeminiLiveLLMService` where in some circumstances it wouldn't
+  respond after a tool call.
+
+- Fixed `GeminiLiveLLMService` session resumption after a connection timeout.
+
+- `GeminiLiveLLMService` now properly supports context-provided system
+  instruction and tools.
+
+- Fixed `GoogleLLMService` token counting to avoid double-counting tokens when
+  Gemini sends usage metadata across multiple streaming chunks.
+
+## [0.0.92] - 2025-10-31 🎃 "The Haunted Edition" 👻
+
+### Added
+
+- Added a new `DeepgramHttpTTSService`, which delivers a meaningful reduction
+  in latency when compared to the `DeepgramTTSService`.
+
+- Add support for `speaking_rate` input parameter in `GoogleHttpTTSService`.
+
+- Added `enable_speaker_diarization` and `enable_language_identification` to
+  `SonioxSTTService`.
+
+- Added `SpeechmaticsTTSService`, which uses Speechmatic's TTS API. Updated
+  examples 07a\* to use the new TTS service.
+
+- Added support for including images or audio to LLM context messages using
+  `LLMContext.create_image_message()` or `LLMContext.create_image_url_message()`
+  (not all LLMs support URLs) and `LLMContext.create_audio_message()`. For
+  example, when creating `LLMMessagesAppendFrame`:
+
+  ```python
+  message = LLMContext.create_image_message(image=..., size= ...)
+  await self.push_frame(LLMMessagesAppendFrame(messages=[message], run_llm=True))
+  ```
+
+- New event handlers for the `DeepgramFluxSTTService`: `on_start_of_turn`,
+  `on_turn_resumed`, `on_end_of_turn`, `on_eager_end_of_turn`, `on_update`.
+
+- Added `generation_config` parameter support to `CartesiaTTSService` and
+  `CartesiaHttpTTSService` for Cartesia Sonic-3 models. Includes a new
+  `GenerationConfig` class with `volume` (0.5-2.0), `speed` (0.6-1.5),
+  and `emotion` (60+ options) parameters for fine-grained speech generation
+  control.
+
+- Expanded support for univeral `LLMContext` to `OpenAIRealtimeLLMService`.
+  As a reminder, the context-setup pattern when using `LLMContext` is:
+
+  ```python
+  context = LLMContext(messages, tools)
+  context_aggregator = LLMContextAggregatorPair(context)
+  ```
+
+  (Note that even though `OpenAIRealtimeLLMService` now supports the universal
+  `LLMContext`, it is not meant to be swapped out for another LLM service at
+  runtime with `LLMSwitcher`.)
+
+  Note: `TranscriptionFrame`s and `InterimTranscriptionFrame`s now go upstream
+  from `OpenAIRealtimeLLMService`, so if you're using `TranscriptProcessor`,
+  say, you'll want to adjust accordingly:
+
+  ```python
+  pipeline = Pipeline(
+    [
+      transport.input(),
+      context_aggregator.user(),
+
+      # BEFORE
+      llm,
+      transcript.user(),
+
+      # AFTER
+      transcript.user(),
+      llm,
+
+      transport.output(),
+      transcript.assistant(),
+      context_aggregator.assistant(),
+    ]
+  )
+  ```
+
+  Also worth noting: whether or not you use the new context-setup pattern with
+  `OpenAIRealtimeLLMService`, some types have changed under the hood:
+
+  ```python
+  ## BEFORE:
+
+  # Context aggregator type
+  context_aggregator: OpenAIContextAggregatorPair
+
+  # Context frame type
+  frame: OpenAILLMContextFrame
+
+  # Context type
+  context: OpenAIRealtimeLLMContext
+  # or
+  context: OpenAILLMContext
+
+  ## AFTER:
+
+  # Context aggregator type
+  context_aggregator: LLMContextAggregatorPair
+
+  # Context frame type
+  frame: LLMContextFrame
+
+  # Context type
+  context: LLMContext
+  ```
+
+  Also note that `RealtimeMessagesUpdateFrame` and
+  `RealtimeFunctionCallResultFrame` have been deprecated, since they're no
+  longer used by `OpenAIRealtimeLLMService`. OpenAI Realtime now works more
+  like other LLM services in Pipecat, relying on updates to its context, pushed
+  by context aggregators, to update its internal state. Listen for
+  `LLMContextFrame`s for context updates.
+
+  Finally, `LLMTextFrame`s are no longer pushed from `OpenAIRealtimeLLMService`
+  when it's configured with `output_modalities=['audio']`. If you need
+  to process its output, listen for `TTSTextFrame`s instead.
+
+- Expanded support for universal `LLMContext` to `GeminiLiveLLMService`.
+  As a reminder, the context-setup pattern when using `LLMContext` is:
+
+  ```python
+  context = LLMContext(messages, tools)
+  context_aggregator = LLMContextAggregatorPair(context)
+  ```
+
+  (Note that even though `GeminiLiveLLMService` now supports the universal
+  `LLMContext`, it is not meant to be swapped out for another LLM service at
+  runtime with `LLMSwitcher`.)
+
+  Worth noting: whether or not you use the new context-setup pattern with
+  `GeminiLiveLLMService`, some types have changed under the hood:
+
+  ```python
+  ## BEFORE:
+
+  # Context aggregator type
+  context_aggregator: GeminiLiveContextAggregatorPair
+
+  # Context frame type
+  frame: OpenAILLMContextFrame
+
+  # Context type
+  context: GeminiLiveLLMContext
+  # or
+  context: OpenAILLMContext
+
+  ## AFTER:
+
+  # Context aggregator type
+  context_aggregator: LLMContextAggregatorPair
+
+  # Context frame type
+  frame: LLMContextFrame
+
+  # Context type
+  context: LLMContext
+  ```
+
+  Also note that `LLMTextFrame`s are no longer pushed from `GeminiLiveLLMService`
+  when it's configured with `modalities=GeminiModalities.AUDIO`. If you need
+  to process its output, listen for `TTSTextFrame`s instead.
+
+### Changed
+
+- The development runner's `/start` endpoint now supports passing
+  `dailyRoomProperties` and `dailyMeetingTokenProperties` in the request body
+  when `createDailyRoom` is true. Properties are validated against the
+  `DailyRoomProperties` and `DailyMeetingTokenProperties` types respectively
+  and passed to Daily's room and token creation APIs.
+
+- `UserImageRawFrame` new fields `append_to_context` and `text`. The
+  `append_to_context` field indicates if this image and text should be added to
+  the LLM context (by the LLM assistant aggregator). The `text` field, if set,
+  might also guide the LLM or the vision service on how to analyze the image.
+
+- `UserImageRequestFrame` new fiels `append_to_context` and `text`. Both fields
+  will be used to set the same fields on the captured `UserImageRawFrame`.
+
+- `UserImageRequestFrame` don't require function call name and ID anymore.
+
+- Updated `MoondreamService` to process `UserImageRawFrame`.
+
+- `VisionService` expects `UserImageRawFrame` in order to analyze images.
+
+- `DailyTransport` triggers `on_error` event if transcription can't be started
+  or stopped.
+
+- `DailyTransport` updates: `start_dialout()` now returns two values:
+  `session_id` and `error`. `start_recording()` now returns two values:
+  `stream_id` and `error`.
+
+- Updated `daily-python` to 0.21.0.
+
+- `SimliVideoService` now accepts `api_key` and `face_id` parameters directly,
+  with optional `params` for `max_session_length` and `max_idle_time`
+  configuration, aligning with other Pipecat service patterns.
+
+- Updated the default model to `sonic-3` for `CartesiaTTSService` and
+  `CartesiaHttpTTSService`.
+
+- `FunctionFilter` now has a `filter_system_frames` arg, which controls whether
+  or not SystemFrames are filtered.
+
+- Upgraded `aws_sdk_bedrock_runtime` to v0.1.1 to resolve potential CPU issues
+  when running `AWSNovaSonicLLMService`.
+
+### Deprecated
+
+- The `expect_stripped_words` parameter of `LLMAssistantAggregatorParams` is
+  ignored when used with the newer `LLMAssistantAggregator`, which now handles
+  word spacing automatically.
+
+- `LLMService.request_image_frame()` is deprecated, push a
+  `UserImageRequestFrame` instead.
+
+- `UserResponseAggregator` is deprecated and will be removed in a future version.
+
+- The `send_transcription_frames` argument to `OpenAIRealtimeLLMService` is
+  deprecated. Transcription frames are now always sent. They go upstream, to be
+  handled by the user context aggregator. See "Added" section for details.
+
+- Types in `pipecat.services.openai.realtime.context` and
+  `pipecat.services.openai.realtime.frames` are deprecated, as they're no
+  longer used by `OpenAIRealtimeLLMService`. See "Added" section for details.
+
+- `SimliVideoService` `simli_config` parameter is deprecated. Use `api_key` and
+  `face_id` parameters instead.
+
+### Removed
+
+- Removed `enable_non_final_tokens` and `max_non_final_tokens_duration_ms` from
+  `SonioxSTTService`.
+
+- Removed the `aiohttp_session` arg from `SarvamTTSService` as it's no longer
+  used.
+
+### Fixed
+
+- Fixed a `PipelineTask` issue that was causing an idle timeout for frames that
+  were being generated but not reaching the end of the pipeline. Since the exact
+  point when frames are discarded is unknown, we now monitor pipeline frames
+  using an observer. If the observer detects frames are being generated, it will
+  prevent the pipeline from being considered idle.
+
+- Fixed an issue in `HumeTTSService` that was only using Octave 2, which does
+  not support the `description` field. Now, if a description is provided, it
+  switches to Octave 1.
+
+- Fixed an issue where `DailyTransport` would timeout prematurely on join and on
+  leave.
+
+- Fixed an issue in the runner where starting a DailyTransport room via
+  `/start` didn't support using the `DAILY_SAMPLE_ROOM_URL` env var.
+
+- Fixed an issue in `ServiceSwitcher` where the `STTService`s would result in
+  all STT services producing `TranscriptionFrame`s.
+
+### Other
+
+- Updated all vision 12-series foundational examples to load images from a file.
+
+- Added 14-series video examples for different services. These new examples
+  request an image from the user camera through a function call.
+
+## [0.0.91] - 2025-10-21
+
+### Added
+
+- It is now possible to start a bot from the `/start` endpoint when using the
+  runner Daily's transport. This follows the Pipecat Cloud format with
+  `createDailyRoom` and `body` fields in the POST request body.
+
+- Added an ellipsis character (`…`) to the end of sentence detection in the
+  string utils.
+
+- Expanded support for universal `LLMContext` to `AWSNovaSonicLLMService`.
+  As a reminder, the context-setup pattern when using `LLMContext` is:
+
+  ```python
+  context = LLMContext(messages, tools)
+  context_aggregator = LLMContextAggregatorPair(context)
+  ```
+
+  (Note that even though `AWSNovaSonicLLMService` now supports the universal
+  `LLMContext`, it is not meant to be swapped out for another LLM service at
+  runtime with `LLMSwitcher`.)
+
+  Worth noting: whether or not you use the new context-setup pattern with
+  `AWSNovaSonicLLMService`, some types have changed under the hood:
+
+  ```python
+  ## BEFORE:
+
+  # Context aggregator type
+  context_aggregator: AWSNovaSonicContextAggregatorPair
+
+  # Context frame type
+  frame: OpenAILLMContextFrame
+
+  # Context type
+  context: AWSNovaSonicLLMContext
+  # or
+  context: OpenAILLMContext
+
+  ## AFTER:
+
+  # Context aggregator type
+  context_aggregator: LLMContextAggregatorPair
+
+  # Context frame type
+  frame: LLMContextFrame
+
+  # Context type
+  context: LLMContext
+  ```
+
+- Added support for `bulbul:v3` model in `SarvamTTSService` and
+  `SarvamHttpTTSService`.
+
+- Added `keyterms_prompt` parameter to `AssemblyAIConnectionParams`.
+
+- Added `speech_model` parameter to `AssemblyAIConnectionParams` to access the
+  multilingual model.
+
+- Added support for trickle ICE to the `SmallWebRTCTransport`.
+
+- Added support for updating `OpenAITTSService` settings (`instructions` and
+  `speed`) at runtime via `TTSUpdateSettingsFrame`.
+
+- Added `--whatsapp` flag to runner to better surface WhatsApp transport logs.
+
+- Added `on_connected` and `on_disconnected` events to TTS and STT
+  websocket-based services.
+
+- Added an `aggregate_sentences` arg in `ElevenLabsHttpTTSService`, where the
+  default value is True.
+
+- Added a `room_properties` arg to the Daily runner's `configure()` method,
+  allowing `DailyRoomProperties` to be provided.
+
+- The runner `--folder` argument now supports downloading files from
+  subdirectories.
+
+### Changed
+
+- `RunnerArguments` now include the `body` field, so there's no need to add it
+  to subclasses. Also, all `RunnerArguments` fields are now keyword-only.
+
+- `CartesiaSTTService` now inherits from `WebsocketSTTService`.
+
+- Package upgrades:
+
+  - `daily-python` upgraded to 0.20.0.
+  - `openai` upgraded to support up to 2.x.x.
+  - `openpipe` upgraded to support up to 5.x.x.
+
+- `SpeechmaticsSTTService` updated dependencies for `speechmatics-rt>=0.5.0`.
+
+### Deprecated
+
+- The `send_transcription_frames` argument to `AWSNovaSonicLLMService` is
+  deprecated. Transcription frames are now always sent. They go upstream, to be
+  handled by the user context aggregator. See "Added" section for details.
+
+- Types in `pipecat.services.aws.nova_sonic.context` are deprecated, as they're
+  no longer used by `AWSNovaSonicLLMService`. See "Added" section for
+  details.
+
+### Fixed
+
+- Fixed an issue where the `RTVIProcessor` was sending duplicate
+  `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame` messages.
+
+- Fixed an issue in `AWSBedrockLLMService` where both `temperature` and `top_p`
+  were always sent together, causing conflicts with models like Claude Sonnet 4.5
+  that don't allow both parameters simultaneously. The service now only includes
+  inference parameters that are explicitly set, and `InputParams` defaults have
+  been changed to `None` to rely on AWS Bedrock's built-in model defaults.
+
+- Fixed an issue in `RivaSegmentedSTTService` where a runtime error occurred due
+  to a mismatch in the `_handle_transcription` method's signature.
+
+- Fixed multiple pipeline task cancellation issues. `asyncio.CancelledError` is
+  now handled properly in `PipelineTask` making it possible to cancel an asyncio
+  task that it's executing a `PipelineRunner` cleanly. Also,
+  `PipelineTask.cancel()` does not block anymore waiting for the `CancelFrame`
+  to reach the end of the pipeline (going back to the behavior in < 0.0.83).
+
+- Fixed an issue in `ElevenLabsTTSService` and `ElevenLabsHttpTTSService` where
+  the Flash models would split words, resulting in a space being inserted
+  between words.
+
+- Fixed an issue where audio filters' `stop()` would not be called when using
+  `CancelFrame`.
+
+- Fixed an issue in `ElevenLabsHttpTTSService`, where
+  `apply_text_normalization` was incorrectly set as a query parameter. It's now
+  being added as a request parameter.
+
+- Fixed an issue where `RimeHttpTTSService` and `PiperTTSService` could generate
+  incorrectly 16-bit aligned audio frames, potentially leading to internal
+  errors or static audio.
+
+- Fixed an issue in `SpeechmaticsSTTService` where `AdditionalVocabEntry` items
+  needed to have `sounds_like` for the session to start.
+
+### Other
+
+- Added foundational example `47-sentry-metrics.py`, demonstrating how to use the
+  `SentryMetrics` processor.
+
+- Added foundational example `14x-function-calling-openpipe.py`.
+
+## [0.0.90] - 2025-10-10
+
+### Added
+
+- Added audio filter `KrispVivaFilter` using the Krisp VIVA SDK.
+
+- Added `--folder` argument to the runner, allowing files saved in that folder
+  to be downloaded from `http://HOST:PORT/file/FILE`.
+
+- Added `GeminiLiveVertexLLMService`, for accessing Gemini Live via Google
+  Vertex AI.
+
+- Added some new configuration options to `GeminiLiveLLMService`:
+
+  - `thinking`
+  - `enable_affective_dialog`
+  - `proactivity`
+
+  Note that these new configuration options require using a newer model than
+  the default, like "gemini-2.5-flash-native-audio-preview-09-2025". The last
+  two require specifying `http_options=HttpOptions(api_version="v1alpha")`.
+
+- Added `on_pipeline_error` event to `PipelineTask`. This event will get fired
+  when an `ErrorFrame` is pushed (use `FrameProcessor.push_error()`).
+
+  ```python
+  @task.event_handler("on_pipeline_error")
+  async def on_pipeline_error(task: PipelineTask, frame: ErrorFrame):
+      ...
+  ```
+
+- Added a `service_tier` `InputParam` to the `BaseOpenAILLMService`. This
+  parameter can influence the latency of the response. For example `"priority"`
+  will result in faster completions, but in exchange for a higher price.
+
+### Changed
+
+- Updated `GeminiLiveLLMService` to use the `google-genai` library rather than
+  use WebSockets directly.
+
+### Deprecated
+
+- `LivekitFrameSerializer` is now deprecated. Use `LiveKitTransport` instead.
+
+- `pipecat.service.openai_realtime` is now deprecated, use
+  `pipecat.services.openai.realtime` instead or
+  `pipecat.services.azure.realtime` for Azure Realtime.
+
+- `pipecat.service.aws_nova_sonic` is now deprecated, use
+  `pipecat.services.aws.nova_sonic` instead.
+
+- `GeminiMultimodalLiveLLMService` is now deprecated, use
+  `GeminiLiveLLMService`.
+
+### Fixed
+
+- Fixed a `GoogleVertexLLMService` issue that would generate an error if no
+  token information was returned.
+
+- `GeminiLiveLLMService` will now end gracefully (i.e. after the bot has
+  finished) upon receiving an `EndFrame`.
+
+- `GeminiLiveLLMService` will try to seamlessly reconnect when it loses its
+  connection.
+
+## [0.0.89] - 2025-10-07
+
+### Fixed
+
+- Reverted a change introduced in 0.0.88 that was causing pipelines to be frozen
+  when using interruption strategies and processors that block interruption
+  frames (e.g. `STTMuteFilter`).
+
+## [0.0.88] - 2025-10-07
+
+### Added
+
+- Added support for Nano Banana models to `GoogleLLMService`. For example, you
+  can now use the `gemini-2.5-flash-image` model to generate images.
+
+- Added `HumeTTSService` for text-to-speech synthesis using Hume AI's expressive
+  voice models. Provides high-quality, emotionally expressive speech synthesis
+  with support for various voice models. Includes example in
+  `examples/foundational/07ad-interruptible-hume.py`. Use with:
+  `uv pip install pipecat-ai[hume]`.
+
+### Changed
+
+- Updated default `GoogleLLMService` model to `gemini-2.5-flash`.
+
+### Deprecated
+
+- PlayHT is shutting down their API on December 31st, 2025. As a result,
+  `PlayHTTTSService` and `PlayHTHttpTTSService` are deprecated and will be
+  removed in a future version.
+
+### Fixed
+
+- Fixed an issue with `AWSNovaSonicLLMService` where the client wouldn't
+  connect due to a breaking change in the AWS dependency chain.
+
+- `PermissionError` is now caught if NLTK's `punkt_tab` can't be downloaded.
+
+- Fixed an issue that would cause wrong user/assistant context ordering when
+  using interruption strategies.
+
+- Fixed RTVI incoming message handling, broken in 0.0.87.
+
+## [0.0.87] - 2025-10-02
+
+### Added
+
+- Added `WebsocketSTTService` base class for websocket-based STT services.
+  Combines STT functionality with websocket connectivity, providing automatic
+  error handling and reconnection capabilities with exponential backoff.
+
+- Added `DeepgramFluxSTTService` for real-time speech recognition using
+  Deepgram's Flux WebSocket API. Flux understands conversational flow and
+  automatically handles turn-taking.
+
+- Added RTVI messages for user/bot audio levels and system logs.
+
+- Include OpenAI-based LLM services cached tokens to `MetricsFrame`.
+
+### Changed
+
+- Updated the default model for `AnthropicLLMService` to
+  `claude-sonnet-4-5-20250929`.
+
+### Deprecated
+
+- `DailyTransportMessageFrame` and `DailyTransportMessageUrgentFrame` are
+  deprecated, use `DailyOutputTransportMessageFrame` and
+  `DailyOutputTransportMessageUrgentFrame` respectively instead.
+
+- `LiveKitTransportMessageFrame` and `LiveKitTransportMessageUrgentFrame` are
+  deprecated, use `LiveKitOutputTransportMessageFrame` and
+  `LiveKitOutputTransportMessageUrgentFrame` respectively instead.
+
+- `TransportMessageFrame` and `TransportMessageUrgentFrame` are deprecated, use
+  `OutputTransportMessageFrame` and `OutputTransportMessageUrgentFrame`
+  respectively instead.
+
+- `InputTransportMessageUrgentFrame` is deprecated, use
+  `InputTransportMessageFrame` instead.
+
+- `DailyUpdateRemoteParticipantsFrame` is deprecated and will be removed in a
+  future version. Instead, create your own custom frame and handle it in the
+  `@transport.output().event_handler("on_after_push_frame")` event handler or a
+  custom processor.
+
+## Fixed
+
+- Fixed an issue in `AWSBedrockLLMService` where timeout exceptions weren't
+  being detected.
+
+- Fixed a `PipelineTask` issue that could prevent the application to exit if
+  `task.cancel()` was called when the task was already finished.
+
+- Fixed an issue where local SmartTurn was not being ran in a separate thread.
+
 ## [0.0.86] - 2025-09-24

 ### Added
@@ -835,6 +1546,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added

+- Added `SonioxSTTService` using Soniox's STT websocket API.
+
 - Added `enable_emulated_vad_interruptions` to `LLMUserAggregatorParams`.
  When user speech is emulated (e.g. when a transcription is received but
  VAD doesn't detect speech), this parameter controls whether the emulated
@@ -1326,7 +2039,7 @@ quality and critical bugs impacting `ParallelPipelines` functionality.**
 - Added `session_token` parameter to `AWSNovaSonicLLMService`.

 - Added Gemini Multimodal Live File API for uploading, fetching, listing, and
-  deleting files. See `26f-gemini-multimodal-live-files-api.py` for example usage.
+  deleting files. See `26f-gemini-live-files-api.py` for example usage.

 ### Changed

@@ -3332,7 +4045,7 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
 - Added the new modalities option and helper function to set Gemini output
  modalities.

- Added `examples/foundational/26d-gemini-multimodal-live-text.py` which is
+- Added `examples/foundational/26d-gemini-live-text.py` which is
  using Gemini as TEXT modality and using another TTS provider for TTS process.

 ### Changed
@@ -3519,9 +4232,9 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
 - Added new foundational examples for `GeminiMultimodalLiveLLMService`:

  - `26-gemini-multimodal-live.py`
-  - `26a-gemini-multimodal-live-transcription.py`
-  - `26b-gemini-multimodal-live-video.py`
-  - `26c-gemini-multimodal-live-video.py`
+  - `26a-gemini-live-transcription.py`
+  - `26b-gemini-live-video.py`
+  - `26c-gemini-live-video.py`

 - Added `SimliVideoService`. This is an integration for Simli AI avatars.
  (see https://www.simli.com)
--- a/COMMUNITY_INTEGRATIONS.md
+++ b/COMMUNITY_INTEGRATIONS.md
@@ -0,0 +1,336 @@
+# Community Integrations Guide
+
+Pipecat welcomes community-maintained integrations! As our ecosystem grows, we've established a process for any developer to create and maintain their own service integrations while ensuring discoverability for the Pipecat community.
+
+## Overview
+
+**What we support:** Community-maintained integrations that live in separate repositories and are maintained by their authors.
+
+**What we don't do:** The Pipecat team does not code review, test, or maintain community integrations. We provide guidance and list approved integrations for discoverability.
+
+**Why this approach:** This allows the community to move quickly while keeping the Pipecat core team focused on maintaining the framework itself.
+
+## Submitting your Integration
+
+To be listed as an official community integration, follow these steps:
+
+### Step 1: Build Your Integration
+
+Create your integration following the patterns and examples shown in the "Integration Patterns and Examples" section below.
+
+### Step 2: Set Up Your Repository
+
+Your repository must contain these components:
+
+- **Source code** - Complete implementation following Pipecat patterns
+- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational))
+- **README.md** - Must include:
+
+  - Introduction and explanation of your integration
+  - Installation instructions
+  - Usage instructions with Pipecat Pipeline
+  - How to run your example
+  - Pipecat version compatibility (e.g., "Tested with Pipecat v0.0.86")
+  - Company attribution: If you work for the company providing the service, please mention this in your README. This helps build confidence that the integration will be actively maintained.
+
+- **LICENSE** - Permissive license (BSD-2 like Pipecat, or equivalent open source terms)
+- **Code documentation** - Source code with docstrings (we recommend following [Pipecat's docstring conventions](https://github.com/pipecat-ai/pipecat/blob/main/CONTRIBUTING.md#docstring-conventions))
+- **Changelog** - Maintain a changelog for version updates
+
+### Step 3: Join Discord
+
+Join our Discord: https://discord.gg/pipecat
+
+### Step 4: Submit for Listing
+
+Submit a pull request to add your integration to our [Community Integrations documentation page](https://docs.pipecat.ai/server/services/community-integrations).
+
+**To submit:**
+
+1. Fork the [Pipecat docs repository](https://github.com/pipecat-ai/docs)
+2. Edit the file `server/services/community-integrations.mdx`
+3. Add your integration to the appropriate service category table with:
+   - Service name
+   - Link to your repository
+   - Maintainer GitHub username(s)
+4. Include a link to your demo video (approx 30-60 seconds) in your PR description showing:
+   - Core functionality of your integration
+   - Handling of an interruption (if applicable to service type)
+5. Submit your pull request
+
+Once your PR is submitted, post in the `#community-integrations` Discord channel to let us know.
+
+## Integration Patterns and Examples
+
+### STT (Speech-to-Text) Services
+
+#### Websocket-based Services
+
+**Base class:** `STTService`
+
+**Examples:**
+
+- [DeepgramSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/deepgram/stt.py)
+- [SpeechmaticsSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/speechmatics/stt.py)
+
+#### File-based Services
+
+**Base class:** `SegmentedSTTService`
+
+**Examples:**
+
+- [RivaSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/riva/stt.py)
+- [FalSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/fal/stt.py)
+
+#### Key requirements:
+
+- STT services should push `InterimTranscriptionFrames` and `TranscriptionFrames`
+- If confidence values are available, filter for values >50% confidence
+
+### LLM (Large Language Model) Services
+
+#### OpenAI-Compatible Services
+
+**Base class:** `OpenAILLMService`
+
+**Examples:**
+
+- [AzureLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/azure/llm.py)
+- [GrokLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/grok/llm.py) - Shows overriding the base class where needed
+
+#### Non-OpenAI Compatible Services
+
+**Requires:** Full implementation
+
+**Examples:**
+
+- [AnthropicLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/anthropic/llm.py)
+- [GoogleLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/llm.py)
+
+#### Key requirements:
+
+- **Frame sequence:** Output must follow this frame sequence pattern:
+
+  - `LLMFullResponseStartFrame` - Signals the start of an LLM response
+  - `LLMTextFrame` - Contains LLM content, typically streamed as tokens
+  - `LLMFullResponseEndFrame` - Signals the end of an LLM response
+
+- **Context aggregation:** Implement context aggregation to collect user and assistant content:
+  - Aggregators come in pairs with a `user()` instance and `assistant()` instance
+  - Context must adhere to the `LLMContext` universal format
+  - Aggregators should handle adding messages, function calls, and images to the context
+
+### TTS (Text-to-Speech) Services
+
+#### AudioContextWordTTSService
+
+**Use for:** Websocket-based services supporting word/timestamp alignment
+
+**Example:**
+
+- [CartesiaTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/cartesia/tts.py)
+
+#### InterruptibleTTSService
+
+**Use for:** Websocket-based services without word/timestamp alignment, requiring disconnection on interruption
+
+**Example:**
+
+- [SarvamTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/sarvam/tts.py)
+
+#### WordTTSService
+
+**Use for:** HTTP-based services supporting word/timestamp alignment
+
+**Example:**
+
+- [ElevenLabsHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/elevenlabs/tts.py)
+
+#### TTSService
+
+**Use for:** HTTP-based services without word/timestamp alignment
+
+**Example:**
+
+- [GoogleHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/tts.py)
+
+#### Key requirements:
+
+- For websocket services, use asyncio WebSocket implementation (required for v13+ support)
+- Handle idle service timeouts with keepalives
+- TTSServices push both audio (`TTSRawAudioFrame`) and text (`TTSTextFrame`) frames
+
+### Telephony Serializers
+
+Pipecat supports telephony provider integration using websocket connections to exchange MediaStreams. These services use a FrameSerializer to serialize and deserialize inputs from the FastAPIWebsocketTransport.
+
+**Examples:**
+
+- [Twilio](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/serializers/twilio.py)
+- [Telnyx](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/serializers/telnyx.py)
+
+#### Key requirements:
+
+- Include hang-up functionality using the provider's native API, ideally using `aiohttp`
+- Support DTMF (dual-tone multi-frequency) events if the provider supports them:
+  - Deserialize DTMF events from the provider's protocol to `InputDTMFFrame`
+  - Use `KeypadEntry` enum for valid keypad entries (0-9, \*, #, A-D)
+  - Handle invalid DTMF digits gracefully by returning `None`
+
+### Image Generation Services
+
+**Base class:** `ImageGenService`
+
+**Examples:**
+
+- [FalImageGenService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/fal/image.py)
+- [GoogleImageGenService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/image.py)
+
+#### Key requirements:
+
+- Must implement `run_image_gen` method returning an `AsyncGenerator`
+
+### Vision Services
+
+Vision services process images and provide analysis such as descriptions, object detection, or visual question answering.
+
+**Base class:** `VisionService`
+
+**Example:**
+
+- [MoondreamVisionService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/moondream/vision.py)
+
+#### Key requirements:
+
+- Must implement `run_vision` method that takes an `LLMContext` and returns an `AsyncGenerator[Frame, None]`
+- The method processes the latest image in the context and yields frames with analysis results
+- Typically yields `TextFrame` objects containing descriptions or answers
+
+## Implementation Guidelines
+
+### Naming Conventions
+
+- **STT:** `VendorSTTService`
+- **LLM:** `VendorLLMService`
+- **TTS:**
+  - Websocket: `VendorTTSService`
+  - HTTP: `VendorHttpTTSService`
+- **Image:** `VendorImageGenService`
+- **Vision:** `VendorVisionService`
+- **Telephony:** `VendorFrameSerializer`
+
+### Metrics Support
+
+Enable metrics in your service:
+
+```python
+def can_generate_metrics(self) -> bool:
+    """Check if this service can generate processing metrics.
+
+    Returns:
+        True, as this service supports metrics.
+    """
+    return True
+```
+
+### Dynamic Settings Updates
+
+STT, LLM, and TTS services support `ServiceUpdateSettingsFrame` for dynamic configuration changes. The base STTService has an `_update_settings()` method that handles settings, and the private `_settings` `Dict` is used to store settings and provide access to the subclass.
+
+```python
+async def set_language(self, language: Language):
+    """Set the recognition language and reconnect.
+
+    Args:
+        language: The language to use for speech recognition.
+    """
+    logger.info(f"Switching STT language to: [{language}]")
+    self._settings["language"] = language
+    await self._disconnect()
+    await self._connect()
+```
+
+Note that, in this example, Deepgram requires the websocket connection be disconnected and reconnected to reinitialize the service with the new value. Consider if your service requires reconnection.
+
+### Sample Rate Handling
+
+Sample rates are set via PipelineParams and passed to each frame processor at initialization. The pattern is to _not_ set the sample rate value in the constructor of a given service. Instead, use the `start()` method to initialize sample rates from the frame:
+
+```python
+async def start(self, frame: StartFrame):
+    """Start the service."""
+    await super().start(frame)
+    self._settings["output_format"]["sample_rate"] = self.sample_rate
+    await self._connect()
+```
+
+Note that `self.sample_rate` is a `@property` set in the TTSService base class, which provides access to the private sample rate value obtained from the StartFrame.
+
+### Tracing Decorators
+
+Use Pipecat's tracing decorators:
+
+- **STT:** `@traced_stt` - decorate a function that handles `transcript`, `is_final`, `language` as args
+- **LLM:** `@traced_llm` - decorate the `_process_context()` method
+- **TTS:** `@traced_tts` - decorate the `run_tts()` method
+
+## Best Practices
+
+### Packaging and Distribution
+
+- Use [uv](https://docs.astral.sh/uv/) for packaging (encouraged)
+- Consider releasing to PyPI for easier installation
+- Follow semantic versioning principles
+- Maintain a changelog
+
+### HTTP Communication
+
+For REST-based communication, use aiohttp. Pipecat includes this as a required dependency, so using it prevents adding an additional dependency to your integration.
+
+### Error Handling
+
+- Wrap API calls in appropriate try/catch blocks
+- Handle rate limits and network failures gracefully
+- Provide meaningful error messages
+- When errors occur, raise exceptions AND push `ErrorFrame`s to notify the pipeline:
+
+```python
+from pipecat.frames.frames import ErrorFrame
+
+try:
+    # Your API call
+    result = await self._make_api_call()
+except Exception as e:
+    # Push error frame to pipeline
+    await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+    # Raise or handle as appropriate
+    raise
+```
+
+### Testing
+
+- Your foundational example serves as a valuable integration-level test
+- Unit tests are nice to have. As the Pipecat teams provides better guidance, we will encourage unit testing more
+
+## Disclaimer
+
+Community integrations are community-maintained and not officially supported by the Pipecat team. Users should evaluate these integrations independently. The Pipecat team reserves the right to remove listings that become unmaintained or problematic.
+
+## Staying Up to Date
+
+Pipecat evolves rapidly to support the latest AI technologies and patterns. While we strive to minimize breaking changes, they do occur as the framework matures.
+
+**We strongly recommend:**
+
+- Join our Discord at https://discord.gg/pipecat and monitor the `#announcements` channel for release notifications
+- Follow our changelog: https://github.com/pipecat-ai/pipecat/blob/main/CHANGELOG.md
+- Test your integration against new Pipecat releases promptly
+- Update your README with the last tested Pipecat version
+
+This helps ensure your integration remains compatible and your users have clear expectations about version support.
+
+## Questions?
+
+Join our Discord community at https://discord.gg/pipecat and post in the `#community-integrations` channel for guidance and support.
+
+For additional questions, you can also reach out to us at pipecat-ai@daily.co.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,5 +1,9 @@
 ## Contributing to Pipecat

+**Want to add a new service integration?**
+We encourage community-maintained integrations! Please see our [Community Integration Guide](COMMUNITY_INTEGRATIONS.md) for the process and requirements.
+
+**Want to contribute to Pipecat core?**
 We welcome contributions of all kinds! Your help is appreciated. Follow these steps to get involved:

 1. **Fork this repository**: Start by forking the Pipecat Documentation repository to your GitHub account.
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@
 </div></h1>

 [![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) ![Tests](https://github.com/pipecat-ai/pipecat/actions/workflows/tests.yaml/badge.svg) [![codecov](https://codecov.io/gh/pipecat-ai/pipecat/graph/badge.svg?token=LNVUIVO4Y9)](https://codecov.io/gh/pipecat-ai/pipecat) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/pipecat-ai/pipecat)
+[![](https://getmanta.ai/api/badges?text=Manta%20Graph&link=manta)](https://getmanta.ai/pipecat)

 # 🎙️ Pipecat: Real-Time Voice & Multimodal AI Agents

@@ -19,10 +20,6 @@
 - **Business Agents** – customer intake, support bots, guided flows
 - **Complex Dialog Systems** – design logic with structured conversations

-🧭 Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
-
-🔍 Looking for help debugging your pipeline and processors? Check out [Whisker](https://github.com/pipecat-ai/whisker), a real-time Pipecat debugger.
-
 ## 🧠 Why Pipecat?

 - **Voice-first**: Integrates speech recognition, text-to-speech, and conversation handling
@@ -30,40 +27,38 @@
 - **Composable Pipelines**: Build complex behavior from modular components
 - **Real-Time**: Ultra-low latency interaction with different transports (e.g. WebSockets or WebRTC)

-## 📱 Client SDKs
+## 🌐 Pipecat Ecosystem

-You can connect to Pipecat from any platform using our official SDKs:
+### 📱 Client SDKs

-<table>
-  <tr>
-    <td>
-      <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/javascript/javascript-original.svg" width="40" height="40" alt="JavaScript"/>
-      <a href="https://docs.pipecat.ai/client/js/introduction">JavaScript</a>
-    </td>
-    <td>
-      <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/react/react-original.svg" width="40" height="40" alt="React"/>
-      <a href="https://docs.pipecat.ai/client/react/introduction">React</a>
-    </td>
-    <td>
-      <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/react/react-original.svg" width="40" height="40" alt="React Native"/>
-      <a href="https://docs.pipecat.ai/client/react-native/introduction">React Native</a>
-    </td>
-  </tr>
-  <tr>
-    <td>
-      <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/swift/swift-original.svg" width="40" height="40" alt="Swift"/>
-      <a href="https://docs.pipecat.ai/client/ios/introduction">Swift</a>
-    </td>
-    <td>
-      <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/kotlin/kotlin-original.svg" width="40" height="40" alt="Kotlin"/>
-      <a href="https://docs.pipecat.ai/client/android/introduction">Kotlin</a>
-    </td>
-    <td>
-      <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/cplusplus/cplusplus-original.svg" width="40" height="40" alt="JavaScript"/>
-      <a href="https://docs.pipecat.ai/client/c++/introduction">C++</a>
-    </td>
-  </tr>
-</table>
+Building client applications? You can connect to Pipecat from any platform using our official SDKs:
+
+<a href="https://docs.pipecat.ai/client/js/introduction">JavaScript</a> | <a href="https://docs.pipecat.ai/client/react/introduction">React</a> | <a href="https://docs.pipecat.ai/client/react-native/introduction">React Native</a> |
+<a href="https://docs.pipecat.ai/client/ios/introduction">Swift</a> | <a href="https://docs.pipecat.ai/client/android/introduction">Kotlin</a> | <a href="https://docs.pipecat.ai/client/c++/introduction">C++</a> | <a href="https://github.com/pipecat-ai/pipecat-esp32">ESP32</a>
+
+### 🧭 Structured conversations
+
+Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
+
+### 🪄 Beautiful UIs
+
+Want to build beautiful and engaging experiences? Checkout the [Voice UI Kit](https://github.com/pipecat-ai/voice-ui-kit), a collection of components, hooks and templates for building voice AI applications quickly.
+
+### 🛠️ Create and deploy projects
+
+Create a new project in under a minute with the [Pipecat CLI](https://github.com/pipecat-ai/pipecat-cli). Then use the CLI to monitor and deploy your agent to production.
+
+### 🔍 Debugging
+
+Looking for help debugging your pipeline and processors? Check out [Whisker](https://github.com/pipecat-ai/whisker), a real-time Pipecat debugger.
+
+### 🖥️ Terminal
+
+Love terminal applications? Check out [Tail](https://github.com/pipecat-ai/tail), a terminal dashboard for Pipecat.
+
+### 📺️ Pipecat TV Channel
+
+Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.youtube.com/playlist?list=PLzU2zoMTQIHjqC3v4q2XVSR3hGSzwKFwH) channel.

 ## 🎬 See it in action

@@ -72,24 +67,24 @@ You can connect to Pipecat from any platform using our official SDKs:
    <a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/storytelling-chatbot/image.png" width="400" /></a>
    <br/>
    <a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/translation-chatbot/image.png" width="400" /></a>&nbsp;
-    <a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/moondream-chatbot/image.png" width="400" /></a>
+    <a href="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/12-describe-video.py"><img src="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/assets/moondream.png" width="400" /></a>
 </p>

 ## 🧩 Available services

-| Category            | Services                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| Speech-to-Text      | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper)                                                                                                                    |
-| LLMs                | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together)                                                                                          |
-| Text-to-Speech      | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
-| Speech-to-Speech    | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| Transport           | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| Serializers         | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| Video               | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| Memory              | [mem0](https://docs.pipecat.ai/server/services/memory/mem0)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| Vision & Image      | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| Audio Processing    | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| Category            | Services                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Speech-to-Text      | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper)                                                                                                                                                                                                                                                        |
+| LLMs                | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together)                                                                                                                                                                                                                              |
+| Text-to-Speech      | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
+| Speech-to-Speech    | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| Transport           | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| Serializers         | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| Video               | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| Memory              | [mem0](https://docs.pipecat.ai/server/services/memory/mem0)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| Vision & Image      | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| Audio Processing    | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |

 📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)

@@ -184,54 +179,6 @@ Run a specific test suite:
 uv run pytest tests/test_name.py
 ```

-### Setting up your editor
-
-This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting via [Ruff](https://github.com/astral-sh/ruff).
-
-#### Emacs
-
-You can use [use-package](https://github.com/jwiegley/use-package) to install [emacs-lazy-ruff](https://github.com/christophermadsen/emacs-lazy-ruff) package and configure `ruff` arguments:
-
-```elisp
-(use-package lazy-ruff
-  :ensure t
-  :hook ((python-mode . lazy-ruff-mode))
-  :config
-  (setq lazy-ruff-format-command "ruff format")
-  (setq lazy-ruff-check-command "ruff check --select I"))
-```
-
-`ruff` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
-
-```elisp
-(use-package pyvenv-auto
-  :ensure t
-  :defer t
-  :hook ((python-mode . pyvenv-auto-run)))
-```
-
-#### Visual Studio Code
-
-Install the
-[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, and enable formatting on save:
-
-```json
-"[python]": {
-    "editor.defaultFormatter": "charliermarsh.ruff",
-    "editor.formatOnSave": true
-}
-```
-
-#### PyCharm
-
-`ruff` was installed in the `venv` environment described before, now to enable autoformatting on save, go to `File` -> `Settings` -> `Tools` -> `File Watchers` and add a new watcher with the following settings:
-
-1. **Name**: `Ruff formatter`
-2. **File type**: `Python`
-3. **Working directory**: `$ContentRoot$`
-4. **Arguments**: `format $FilePath$`
-5. **Program**: `$PyInterpreterDirectory$/ruff`
-
 ## 🤝 Contributing

 We welcome contributions from the community! Whether you're fixing bugs, improving documentation, or adding new features, here's how you can help:
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -0,0 +1,5 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+Please email `disclosures@daily.co`.
--- a/docs/api/conf.py
+++ b/docs/api/conf.py
@@ -50,6 +50,7 @@ autodoc_mock_imports = [
    # Krisp - has build issues on some platforms
    "pipecat_ai_krisp",
    "krisp",
+    "krisp_audio",
    # System-specific GUI libraries
    "_tkinter",
    "tkinter",
--- a/env.example
+++ b/env.example
@@ -4,6 +4,9 @@ AICOUSTICS_LICENSE_KEY=...
 # Anthropic
 ANTHROPIC_API_KEY=...

+# Assembly AI
+ASSEMBLYAI_API_KEY=...
+
 # Async
 ASYNCAI_API_KEY=...
 ASYNCAI_VOICE_ID=...
@@ -21,12 +24,19 @@ AZURE_CHATGPT_API_KEY=...
 AZURE_CHATGPT_ENDPOINT=https://...
 AZURE_CHATGPT_MODEL=...

+AZURE_REALTIME_API_KEY=...
+AZURE_REALTIME_BASE_URL=...
+
 AZURE_DALLE_API_KEY=...
 AZURE_DALLE_ENDPOINT=https://...
 AZURE_DALLE_MODEL=...

 # Cartesia
 CARTESIA_API_KEY=...
+CARTESIA_VOICE_ID=...
+
+# Cerebras
+CEREBRAS_API_KEY=...

 # Daily
 DAILY_API_KEY=...
@@ -35,39 +45,75 @@ DAILY_SAMPLE_ROOM_URL=https://...
 # Deepgram
 DEEPGRAM_API_KEY=...

+# DeepSeek
+DEEPSEEK_API_KEY=...
+
 # ElevenLabs
 ELEVENLABS_API_KEY=...
 ELEVENLABS_VOICE_ID=...

-# Neuphonic
-NEUPHONIC_API_KEY=...
-
 # Fal
 FAL_KEY=...

 # Fireworks
 FIREWORKS_API_KEY=...

+# Fish Audio
+FISH_API_KEY=...
+
 # Gladia
 GLADIA_API_KEY=...
 GLADIA_REGION=...

 # Google
 GOOGLE_API_KEY=...
-GOOGLE_CLOUD_PROJECT_ID=...
-GOOGLE_TEST_CREDENTIALS=...
 GOOGLE_VERTEX_TEST_CREDENTIALS=...
+GOOGLE_CLOUD_PROJECT_ID=...
+GOOGLE_CLOUD_LOCATION=...
+GOOGLE_TEST_CREDENTIALS=...
+
+# Grok
+GROK_API_KEY=...
+
+# Groq
+GROQ_API_KEY=...
+
+# Heygen
+HEYGEN_API_KEY=...
+
+# Hume
+HUME_API_KEY=...
+HUME_VOICE_ID=...
+
+# Inworld
+INWORLD_API_KEY=...
+
+# Krisp
+KRISP_MODEL_PATH=...
+
+# Krisp Viva
+KRISP_VIVA_MODEL_PATH=...
+
+# LiveKit
+LIVEKIT_API_KEY=...
+LIVEKIT_API_SECRET=...

 # LMNT
 LMNT_API_KEY=...
 LMNT_VOICE_ID=...

-# Perplexity
-PERPLEXITY_API_KEY=...
+# MiniMax
+MINIMAX_API_KEY=...
+MINIMAX_GROUP_ID=...

-# PlayHT
-PLAYHT_USER_ID=...
-PLAYHT_API_KEY=...
+# Mistral
+MISTRAL_API_KEY=...
+
+# Neuphonic
+NEUPHONIC_API_KEY=...
+
+# NVIDIA
+NVIDIA_API_KEY=...

 # OpenAI
 OPENAI_API_KEY=...
@@ -75,83 +121,73 @@ OPENAI_API_KEY=...
 # OpenPipe
 OPENPIPE_API_KEY=...

-# Tavus
-TAVUS_API_KEY=...
-TAVUS_REPLICA_ID=...
-TAVUS_PERSONA_ID=...
+# OpenRouter
+OPENROUTER_API_KEY=...
+
+# Perplexity
+PERPLEXITY_API_KEY=...
+
+# Picovoice Koala
+KOALA_ACCESS_KEY=...
+
+# Piper
+PIPER_BASE_URL=...
+
+# PlayHT
+PLAYHT_USER_ID=...
+PLAYHT_API_KEY=...
+
+# Plivo
+PLIVO_AUTH_ID=...
+PLIVO_AUTH_TOKEN=...
+
+# Qwen
+QWEN_API_KEY=...
+
+# Rime
+RIME_API_KEY=...
+RIME_VOICE_ID=...
+
+# SambaNova
+SAMBANOVA_API_KEY=...
+
+# Sarvam AI
+SARVAM_API_KEY=...
+
+# Sentry
+SENTRY_DSN=...

 # Simli
 SIMLI_API_KEY=...
 SIMLI_FACE_ID=...

-# Krisp
-KRISP_MODEL_PATH=...
-
-# DeepSeek
-DEEPSEEK_API_KEY=...
-
-# Groq
-GROQ_API_KEY=...
-
-# Grok
-GROK_API_KEY=...
-
-# Inworld
-INWORLD_API_KEY=...
-
-# Together.ai
-TOGETHER_API_KEY=...
-
-# Cerebras
-CEREBRAS_API_KEY=...
-
-# Fish Audio
-FISH_API_KEY=...
-
-# Assembly AI
-ASSEMBLYAI_API_KEY=...
-
-# OpenRouter
-OPENROUTER_API_KEY=...
-
-# Piper
-PIPER_BASE_URL=...
-
 # Smart turn
 LOCAL_SMART_TURN_MODEL_PATH=...
 FAL_SMART_TURN_API_KEY=...

+# Soniox
+SONIOX_API_KEY=...
+
+# Speechmatics
+SPEECHMATICS_API_KEY=...
+
+# Tavus
+TAVUS_API_KEY=...
+TAVUS_REPLICA_ID=...
+
+# Telnyx
+TELNYX_API_KEY=...
+TELNYX_ACCOUNT_SID=...
+
+# Together.ai
+TOGETHER_API_KEY=...
+
 # Twilio
 TWILIO_ACCOUNT_SID=...
 TWILIO_AUTH_TOKEN=...

-# MiniMax
-MINIMAX_API_KEY=...
-MINIMAX_GROUP_ID=...
-
-# Sarvam AI
-SARVAM_API_KEY=...
-
-# Soniox
-SONIOX_API_KEY=
-
-# Speechmatics
-SPEECHMATICS_API_KEY=...
-
-# SambaNova
-SAMBANOVA_API_KEY=...
-
-# Sentry
-SENTRY_DSN=...
-
-# Heygen
-HEYGEN_API_KEY=...
-
-# Mistral
-MISTRAL_API_KEY=...
-
-# NVIDIA
-NVIDIA_API_KEY=...
-
-# Qwen
-QWEN_API_KEY=...
+# WhatsApp
+WHATSAPP_TOKEN=...
+WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=...
+WHATSAPP_PHONE_NUMBER_ID=...
+WHATSAPP_APP_SECRET=...
--- a/examples/foundational/04a-transports-daily.py
+++ b/examples/foundational/04a-transports-daily.py
@@ -25,7 +25,7 @@ from pipecat.processors.aggregators.llm_response_universal import LLMContextAggr
 from pipecat.runner.daily import configure
 from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.openai.llm import OpenAILLMService
-from pipecat.transports.daily.transport import DailyLogLevel, DailyParams, DailyTransport
+from pipecat.transports.daily.transport import DailyParams, DailyTransport

 load_dotenv(override=True)

@@ -49,7 +49,6 @@ async def main():
                turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
            ),
        )
-        transport.set_log_level(DailyLogLevel.Info)

        tts = CartesiaTTSService(
            api_key=os.getenv("CARTESIA_API_KEY"),
--- a/examples/foundational/07-interruptible-cartesia-http.py
+++ b/examples/foundational/07-interruptible-cartesia-http.py
@@ -21,8 +21,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.stt import CartesiaSTTService
 from pipecat.services.cartesia.tts import CartesiaHttpTTSService
-from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.openai.llm import OpenAILLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -59,7 +59,7 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+    stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))

    tts = CartesiaHttpTTSService(
        api_key=os.getenv("CARTESIA_API_KEY"),
--- a/examples/foundational/07a-interruptible-speechmatics-vad.py
+++ b/examples/foundational/07a-interruptible-speechmatics-vad.py
@@ -6,6 +6,7 @@

 import os

+import aiohttp
 from dotenv import load_dotenv
 from loguru import logger

@@ -20,10 +21,10 @@ from pipecat.processors.aggregators.llm_response import (
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
 from pipecat.services.openai.base_llm import BaseOpenAILLMService
 from pipecat.services.openai.llm import OpenAILLMService
 from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
+from pipecat.services.speechmatics.tts import SpeechmaticsTTSService
 from pipecat.transcriptions.language import Language
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -51,121 +52,127 @@ transport_params = {


 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
-    """Speechmatics STT Service Example
+    """Speechmatics STT and TTS Service Example

-    This example demonstrates using Speechmatics Speech-to-Text service with speaker diarization and intelligent speaker management. Key features:
+    This example demonstrates using Speechmatics Speech-to-Text and Text-to-Speech services
+    with speaker diarization and intelligent speaker management. Key features:

-    1. Speaker Diarization
+    1. Speaker Diarization (STT)
       - Automatically identifies and distinguishes between different speakers
       - First speaker is identified as 'S1', others get subsequent IDs
       - Uses `enable_diarization` parameter to manage speaker detection

-    2. Smart Speaker Control
+    2. Smart Speaker Control (STT)
       - `focus_speakers` parameter lets you target specific speakers (e.g. ["S1"])
       - Other speakers will be wrapped in PASSIVE tags
       - Only processes speech from focused speakers
       - Words from all speakers are wrapped with XML tags for clear speaker identification
       - Other speakers' speech only sent when focused speaker is active

-    3. Voice Activity Detection
+    3. Voice Activity Detection (STT)
       - Built-in VAD using `enable_vad` parameter
       - Remove `vad_analyzer` from `transport` config to use module's VAD
       - Emits speaker started/stopped events

-    4. Configuration Options
+    4. Text-to-Speech (TTS)
+       - Low latency streaming audio synthesis
+       - Multiple voice options available including `sarah`, `theo`, and `megan`
+
+    5. Configuration Options
       - `operating_point` parameter defaults to `ENHANCED` for optimal accuracy
       - Configurable `end_of_utterance_silence_trigger` (default 0.5s)
       - Customizable speaker formatting
       - Additional diarization settings available

-    For detailed information about operating points and configuration:
-    https://docs.speechmatics.com/rt-api-ref
+    For detailed information:
+    - STT: https://docs.speechmatics.com/rt-api-ref
+    - TTS: https://docs.speechmatics.com/text-to-speech/quickstart
    """

    logger.info(f"Starting bot")
-
-    stt = SpeechmaticsSTTService(
-        api_key=os.getenv("SPEECHMATICS_API_KEY"),
-        params=SpeechmaticsSTTService.InputParams(
-            language=Language.EN,
-            enable_vad=True,
-            enable_diarization=True,
-            focus_speakers=["S1"],
-            end_of_utterance_silence_trigger=0.5,
-            speaker_active_format="<{speaker_id}>{text}</{speaker_id}>",
-            speaker_passive_format="<PASSIVE><{speaker_id}>{text}</{speaker_id}></PASSIVE>",
-        ),
-    )
-
-    tts = ElevenLabsTTSService(
-        api_key=os.getenv("ELEVENLABS_API_KEY"),
-        voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
-        model="eleven_turbo_v2_5",
-    )
-
-    llm = OpenAILLMService(
-        api_key=os.getenv("OPENAI_API_KEY"),
-        params=BaseOpenAILLMService.InputParams(temperature=0.75),
-    )
-
-    messages = [
-        {
-            "role": "system",
-            "content": (
-                "You are a helpful British assistant called Alfred. "
-                "Your goal is to demonstrate your capabilities in a succinct way. "
-                "Your output will be converted to audio so don't include special characters in your answers. "
-                "Always include punctuation in your responses. "
-                "Give very short replies - do not give longer replies unless strictly necessary. "
-                "Respond to what the user said in a concise, funny, creative and helpful way. "
-                "Use `<Sn/>` tags to identify different speakers - do not use tags in your replies. "
-                "Do not respond to speakers within `<PASSIVE/>` tags unless explicitly asked to. "
+    async with aiohttp.ClientSession() as session:
+        stt = SpeechmaticsSTTService(
+            api_key=os.getenv("SPEECHMATICS_API_KEY"),
+            params=SpeechmaticsSTTService.InputParams(
+                language=Language.EN,
+                enable_vad=True,
+                enable_diarization=True,
+                focus_speakers=["S1"],
+                end_of_utterance_silence_trigger=0.5,
+                speaker_active_format="<{speaker_id}>{text}</{speaker_id}>",
+                speaker_passive_format="<PASSIVE><{speaker_id}>{text}</{speaker_id}></PASSIVE>",
            ),
-        },
-    ]
+        )

-    context = LLMContext(messages)
-    context_aggregator = LLMContextAggregatorPair(
-        context,
-        user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
-    )
+        tts = SpeechmaticsTTSService(
+            api_key=os.getenv("SPEECHMATICS_API_KEY"),
+            voice_id="sarah",
+            aiohttp_session=session,
+        )

-    pipeline = Pipeline(
-        [
-            transport.input(),  # Transport user input
-            stt,
-            context_aggregator.user(),  # User responses
-            llm,  # LLM
-            tts,  # TTS
-            transport.output(),  # Transport bot output
-            context_aggregator.assistant(),  # Assistant spoken responses
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            params=BaseOpenAILLMService.InputParams(temperature=0.75),
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You are a helpful British assistant called Sarah. "
+                    "Your goal is to demonstrate your capabilities in a succinct way. "
+                    "Your output will be converted to audio so don't include special characters in your answers. "
+                    "Always include punctuation in your responses. "
+                    "Give very short replies - do not give longer replies unless strictly necessary. "
+                    "Respond to what the user said in a concise, funny, creative and helpful way. "
+                    "Use `<Sn/>` tags to identify different speakers - do not use tags in your replies. "
+                    "Do not respond to speakers within `<PASSIVE/>` tags unless explicitly asked to. "
+                ),
+            },
        ]
-    )

-    task = PipelineTask(
-        pipeline,
-        params=PipelineParams(
-            enable_metrics=True,
-            enable_usage_metrics=True,
-        ),
-        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
-    )
+        context = LLMContext(messages)
+        context_aggregator = LLMContextAggregatorPair(
+            context,
+            user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
+        )

-    @transport.event_handler("on_client_connected")
-    async def on_client_connected(transport, client):
-        logger.info(f"Client connected")
-        # Kick off the conversation.
-        messages.append({"role": "system", "content": "Say a short hello to the user."})
-        await task.queue_frames([LLMRunFrame()])
+        pipeline = Pipeline(
+            [
+                transport.input(),  # Transport user input
+                stt,
+                context_aggregator.user(),  # User responses
+                llm,  # LLM
+                tts,  # TTS
+                transport.output(),  # Transport bot output
+                context_aggregator.assistant(),  # Assistant spoken responses
+            ]
+        )

-    @transport.event_handler("on_client_disconnected")
-    async def on_client_disconnected(transport, client):
-        logger.info(f"Client disconnected")
-        await task.cancel()
+        task = PipelineTask(
+            pipeline,
+            params=PipelineParams(
+                enable_metrics=True,
+                enable_usage_metrics=True,
+            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+        )

-    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+        @transport.event_handler("on_client_connected")
+        async def on_client_connected(transport, client):
+            logger.info(f"Client connected")
+            # Kick off the conversation.
+            messages.append({"role": "system", "content": "Say a short hello to the user."})
+            await task.queue_frames([LLMRunFrame()])

-    await runner.run(task)
+        @transport.event_handler("on_client_disconnected")
+        async def on_client_disconnected(transport, client):
+            logger.info(f"Client disconnected")
+            await task.cancel()
+
+        runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+        await runner.run(task)


 async def bot(runner_args: RunnerArguments):
--- a/examples/foundational/07a-interruptible-speechmatics.py
+++ b/examples/foundational/07a-interruptible-speechmatics.py
@@ -6,6 +6,7 @@

 import os

+import aiohttp
 from dotenv import load_dotenv
 from loguru import logger

@@ -24,10 +25,10 @@ from pipecat.processors.aggregators.llm_response import (
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
 from pipecat.services.openai.base_llm import BaseOpenAILLMService
 from pipecat.services.openai.llm import OpenAILLMService
 from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
+from pipecat.services.speechmatics.tts import SpeechmaticsTTSService
 from pipecat.transcriptions.language import Language
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -61,100 +62,106 @@ transport_params = {


 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
-    """Run example using Speechmatics STT.
+    """Run example using Speechmatics STT and TTS.

-    This example will use diarization within our STT service and output the words spoken by
-    each individual speaker and wrap them with XML tags for the LLM to process. Note the
-    instructions in the system context for the LLM. This greatly improves the conversation
-    experience by allowing the LLM to understand who is speaking in a multi-party call.
+    This example demonstrates a complete Speechmatics integration with both Speech-to-Text
+    and Text-to-Speech services:

-    By default, this example will use our ENHANCED operating point, which is optimized for
-    high accuracy. You can change this by setting the `operating_point` parameter to a different
-    value.
+    STT Features:
+    - Diarization to identify and distinguish between different speakers
+    - Words spoken by each speaker are wrapped with XML tags for LLM processing
+    - System context instructions help the LLM understand multi-party conversations
+    - ENHANCED operating point by default for optimal accuracy

-    For more information on operating points, see the Speechmatics documentation:
-    https://docs.speechmatics.com/rt-api-ref
+    TTS Features:
+    - Low latency streaming audio synthesis
+    - Multiple voice options available including `sarah`, `theo`, and `megan`
+
+    For more information:
+    - STT: https://docs.speechmatics.com/rt-api-ref
+    - TTS: https://docs.speechmatics.com/text-to-speech/quickstart
    """
    logger.info(f"Starting bot")

-    stt = SpeechmaticsSTTService(
-        api_key=os.getenv("SPEECHMATICS_API_KEY"),
-        params=SpeechmaticsSTTService.InputParams(
-            language=Language.EN,
-            enable_diarization=True,
-            end_of_utterance_silence_trigger=0.5,
-            speaker_active_format="<{speaker_id}>{text}</{speaker_id}>",
-        ),
-    )
-
-    tts = ElevenLabsTTSService(
-        api_key=os.getenv("ELEVENLABS_API_KEY"),
-        voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
-        model="eleven_turbo_v2_5",
-    )
-
-    llm = OpenAILLMService(
-        api_key=os.getenv("OPENAI_API_KEY"),
-        params=BaseOpenAILLMService.InputParams(temperature=0.75),
-    )
-
-    messages = [
-        {
-            "role": "system",
-            "content": (
-                "You are a helpful British assistant called Alfred. "
-                "Your goal is to demonstrate your capabilities in a succinct way. "
-                "Your output will be converted to audio so don't include special characters in your answers. "
-                "Always include punctuation in your responses. "
-                "Give very short replies - do not give longer replies unless strictly necessary. "
-                "Respond to what the user said in a concise, funny, creative and helpful way. "
-                "Use `<Sn/>` tags to identify different speakers - do not use tags in your replies."
+    async with aiohttp.ClientSession() as session:
+        stt = SpeechmaticsSTTService(
+            api_key=os.getenv("SPEECHMATICS_API_KEY"),
+            params=SpeechmaticsSTTService.InputParams(
+                language=Language.EN,
+                enable_diarization=True,
+                end_of_utterance_silence_trigger=0.5,
+                speaker_active_format="<{speaker_id}>{text}</{speaker_id}>",
            ),
-        },
-    ]
+        )

-    context = LLMContext(messages)
-    context_aggregator = LLMContextAggregatorPair(
-        context,
-        user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
-    )
+        tts = SpeechmaticsTTSService(
+            api_key=os.getenv("SPEECHMATICS_API_KEY"),
+            voice_id="sarah",
+            aiohttp_session=session,
+        )

-    pipeline = Pipeline(
-        [
-            transport.input(),  # Transport user input
-            stt,  # STT
-            context_aggregator.user(),  # User responses
-            llm,  # LLM
-            tts,  # TTS
-            transport.output(),  # Transport bot output
-            context_aggregator.assistant(),  # Assistant spoken responses
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            params=BaseOpenAILLMService.InputParams(temperature=0.75),
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You are a helpful British assistant called Sarah. "
+                    "Your goal is to demonstrate your capabilities in a succinct way. "
+                    "Your output will be converted to audio so don't include special characters in your answers. "
+                    "Always include punctuation in your responses. "
+                    "Give very short replies - do not give longer replies unless strictly necessary. "
+                    "Respond to what the user said in a concise, funny, creative and helpful way. "
+                    "Use `<Sn/>` tags to identify different speakers - do not use tags in your replies."
+                ),
+            },
        ]
-    )

-    task = PipelineTask(
-        pipeline,
-        params=PipelineParams(
-            enable_metrics=True,
-            enable_usage_metrics=True,
-        ),
-        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
-    )
+        context = LLMContext(messages)
+        context_aggregator = LLMContextAggregatorPair(
+            context,
+            user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
+        )

-    @transport.event_handler("on_client_connected")
-    async def on_client_connected(transport, client):
-        logger.info(f"Client connected")
-        # Kick off the conversation.
-        messages.append({"role": "system", "content": "Say a short hello to the user."})
-        await task.queue_frames([LLMRunFrame()])
+        pipeline = Pipeline(
+            [
+                transport.input(),  # Transport user input
+                stt,  # STT
+                context_aggregator.user(),  # User responses
+                llm,  # LLM
+                tts,  # TTS
+                transport.output(),  # Transport bot output
+                context_aggregator.assistant(),  # Assistant spoken responses
+            ]
+        )

-    @transport.event_handler("on_client_disconnected")
-    async def on_client_disconnected(transport, client):
-        logger.info(f"Client disconnected")
-        await task.cancel()
+        task = PipelineTask(
+            pipeline,
+            params=PipelineParams(
+                enable_metrics=True,
+                enable_usage_metrics=True,
+            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+        )

-    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+        @transport.event_handler("on_client_connected")
+        async def on_client_connected(transport, client):
+            logger.info(f"Client connected")
+            # Kick off the conversation.
+            messages.append({"role": "system", "content": "Say a short hello to the user."})
+            await task.queue_frames([LLMRunFrame()])

-    await runner.run(task)
+        @transport.event_handler("on_client_disconnected")
+        async def on_client_disconnected(transport, client):
+            logger.info(f"Client disconnected")
+            await task.cancel()
+
+        runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+        await runner.run(task)


 async def bot(runner_args: RunnerArguments):
--- a/examples/foundational/07ae-interruptible-hume.py
+++ b/examples/foundational/07ae-interruptible-hume.py
@@ -0,0 +1,138 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.hume.tts import HUME_SAMPLE_RATE, HumeTTSService
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = HumeTTSService(
+        api_key=os.getenv("HUME_API_KEY"),
+        # Replace with your Hume voice ID
+        voice_id="f898a92e-685f-43fa-985b-a46920f0650b",
+    )
+
+    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            rtvi,
+            stt,
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+            audio_out_sample_rate=HUME_SAMPLE_RATE,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+        observers=[RTVIObserver(rtvi)],
+    )
+
+    @rtvi.event_handler("on_client_ready")
+    async def on_client_ready(rtvi):
+        await rtvi.set_bot_ready()
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/07c-interruptible-deepgram-flux.py
+++ b/examples/foundational/07c-interruptible-deepgram-flux.py
@@ -0,0 +1,122 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContext,
+    LLMContextAggregatorPair,
+)
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
+from pipecat.services.deepgram.tts import DeepgramTTSService
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramFluxSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-2-andromeda-en")
+
+    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    @stt.event_handler("on_update")
+    async def on_deepgram_flux_update(stt, transcript):
+        logger.debug(f"On deeggram flux update: {transcript}")
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/07c-interruptible-deepgram-http.py
+++ b/examples/foundational/07c-interruptible-deepgram-http.py
@@ -0,0 +1,132 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+import aiohttp
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.deepgram.tts import DeepgramHttpTTSService
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    async with aiohttp.ClientSession() as session:
+        stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+        tts = DeepgramHttpTTSService(
+            api_key=os.getenv("DEEPGRAM_API_KEY"),
+            voice="aura-2-andromeda-en",
+            aiohttp_session=session,
+        )
+
+        llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+            },
+        ]
+
+        context = LLMContext(messages)
+        context_aggregator = LLMContextAggregatorPair(context)
+
+        pipeline = Pipeline(
+            [
+                transport.input(),  # Transport user input
+                stt,  # STT
+                context_aggregator.user(),  # User responses
+                llm,  # LLM
+                tts,  # TTS
+                transport.output(),  # Transport bot output
+                context_aggregator.assistant(),  # Assistant spoken responses
+            ]
+        )
+
+        task = PipelineTask(
+            pipeline,
+            params=PipelineParams(
+                enable_metrics=True,
+                enable_usage_metrics=True,
+            ),
+            idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+        )
+
+        @transport.event_handler("on_client_connected")
+        async def on_client_connected(transport, client):
+            logger.info(f"Client connected")
+            # Kick off the conversation.
+            messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+            await task.queue_frames([LLMRunFrame()])
+
+        @transport.event_handler("on_client_disconnected")
+        async def on_client_disconnected(transport, client):
+            logger.info(f"Client disconnected")
+            await task.cancel()
+
+        runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+        await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/07d-interruptible-elevenlabs-http.py
+++ b/examples/foundational/07d-interruptible-elevenlabs-http.py
@@ -23,7 +23,6 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.elevenlabs.stt import ElevenLabsSTTService
 from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService
 from pipecat.services.openai.llm import OpenAILLMService
--- a/examples/foundational/07m-interruptible-aws.py
+++ b/examples/foundational/07m-interruptible-aws.py
@@ -67,8 +67,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    llm = AWSBedrockLLMService(
        aws_region="us-west-2",
-        model="us.anthropic.claude-3-5-haiku-20241022-v1:0",
-        params=AWSBedrockLLMService.InputParams(temperature=0.8, latency="optimized"),
+        model="us.anthropic.claude-haiku-4-5-20251001-v1:0",
+        params=AWSBedrockLLMService.InputParams(temperature=0.8),
    )

    messages = [
--- a/examples/foundational/07n-interruptible-gemini-image.py
+++ b/examples/foundational/07n-interruptible-gemini-image.py
@@ -0,0 +1,151 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+"""
+A conversational AI bot using Gemini for both LLM, STT and TTS.
+
+This example demonstrates how to use Gemini's image generation capabilities.
+
+Features showcased:
+- Gemini LLM for conversation and image generation
+- Google TTS and STT
+
+Run with:
+    python examples/foundational/07n-interruptible-gemini-image.py
+
+Make sure to set your environment variables:
+    export GOOGLE_API_KEY=your_api_key_here
+"""
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.google.llm import GoogleLLMService
+from pipecat.services.google.stt import GoogleSTTService
+from pipecat.services.google.tts import GoogleTTSService
+from pipecat.transcriptions.language import Language
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        video_out_enabled=True,
+        video_out_width=1024,
+        video_out_height=1024,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        video_out_enabled=True,
+        video_out_width=1024,
+        video_out_height=1024,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = GoogleSTTService(
+        params=GoogleSTTService.InputParams(languages=Language.EN_US),
+        credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
+    )
+
+    tts = GoogleTTSService(
+        voice_id="en-US-Chirp3-HD-Charon",
+        params=GoogleTTSService.InputParams(language=Language.EN_US),
+        credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
+    )
+
+    llm = GoogleLLMService(
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        model="gemini-2.5-flash-image",
+    )
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # Gemini TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation with a styled introduction
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/07n-interruptible-google.py
+++ b/examples/foundational/07n-interruptible-google.py
@@ -61,8 +61,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

    stt = GoogleSTTService(
-        params=GoogleSTTService.InputParams(languages=Language.EN_US),
+        params=GoogleSTTService.InputParams(languages=Language.EN_US, model="chirp_3"),
        credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
+        location="us",
    )

    tts = GoogleTTSService(
--- a/examples/foundational/07p-interruptible-krisp-viva.py
+++ b/examples/foundational/07p-interruptible-krisp-viva.py
@@ -0,0 +1,129 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.deepgram.tts import DeepgramTTSService
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+        audio_in_filter=KrispVivaFilter(),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+        audio_in_filter=KrispVivaFilter(),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+        audio_in_filter=KrispVivaFilter(),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
+
+    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/07z-interruptible-sarvam-http.py
+++ b/examples/foundational/07z-interruptible-sarvam-http.py
@@ -22,8 +22,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.services.sarvam.stt import SarvamSTTService
 from pipecat.services.sarvam.tts import SarvamHttpTTSService
 from pipecat.transcriptions.language import Language
 from pipecat.transports.base_transport import BaseTransport, TransportParams
@@ -63,7 +63,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    # Create an HTTP session
    async with aiohttp.ClientSession() as session:
-        stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+        stt = SarvamSTTService(
+            api_key=os.getenv("SARVAM_API_KEY"),
+            model="saarika:v2.5",
+        )

        tts = SarvamHttpTTSService(
            api_key=os.getenv("SARVAM_API_KEY"),
--- a/examples/foundational/07z-interruptible-sarvam.py
+++ b/examples/foundational/07z-interruptible-sarvam.py
@@ -24,8 +24,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.services.sarvam.stt import SarvamSTTService
 from pipecat.services.sarvam.tts import SarvamTTSService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -62,7 +62,10 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+    stt = SarvamSTTService(
+        api_key=os.getenv("SARVAM_API_KEY"),
+        model="saarika:v2.5",
+    )

    tts = SarvamTTSService(
        api_key=os.getenv("SARVAM_API_KEY"),
--- a/examples/foundational/08-bots-arguing.py
+++ b/examples/foundational/08-bots-arguing.py
@@ -1,147 +0,0 @@
-import asyncio
-import logging
-import os
-from typing import Tuple
-
-import aiohttp
-from dotenv import load_dotenv
-
-from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMContextFrame, TextFrame
-from pipecat.pipeline.pipeline import Pipeline
-from pipecat.processors.aggregators import SentenceAggregator
-from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
-from pipecat.runner.daily import configure
-from pipecat.services.azure import AzureLLMService, AzureTTSService
-from pipecat.services.elevenlabs import ElevenLabsTTSService
-from pipecat.services.fal import FalImageGenService
-from pipecat.transports.daily.transport import DailyTransport
-
-load_dotenv(override=True)
-
-logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
-logger = logging.getLogger("pipecat")
-logger.setLevel(logging.DEBUG)
-
-
-async def main():
-    async with aiohttp.ClientSession() as session:
-        (room_url, _) = await configure(session)
-
-        transport = DailyTransport(
-            room_url,
-            None,
-            "Respond bot",
-            duration_minutes=10,
-            mic_enabled=True,
-            mic_sample_rate=16000,
-            camera_enabled=True,
-            camera_width=1024,
-            camera_height=1024,
-        )
-
-        llm = AzureLLMService(
-            api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
-            endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
-            model=os.getenv("AZURE_CHATGPT_MODEL"),
-        )
-        tts1 = AzureTTSService(
-            api_key=os.getenv("AZURE_SPEECH_API_KEY"),
-            region=os.getenv("AZURE_SPEECH_REGION"),
-        )
-        tts2 = ElevenLabsTTSService(
-            api_key=os.getenv("ELEVENLABS_API_KEY"),
-            voice_id="jBpfuIE2acCO8z3wKNLl",
-        )
-        dalle = FalImageGenService(
-            params=FalImageGenService.InputParams(image_size="1024x1024"),
-            aiohttp_session=session,
-            key=os.getenv("FAL_KEY"),
-        )
-
-        bot1_messages = [
-            {
-                "role": "system",
-                "content": "You are a stern librarian. You strongly believe that a hot dog is a sandwich. Start by stating this fact in a few sentences, then be prepared to debate this with the user. You shouldn't ever compromise on the fundamental truth that a hot dog is a sandwich. Your responses should only be a few sentences long.",
-            },
-        ]
-        bot2_messages = [
-            {
-                "role": "system",
-                "content": "You are a silly cat, and you strongly believe that a hot dog is not a sandwich. Debate this with the user, only responding with a few sentences. Don't ever accept that a hot dog is a sandwich.",
-            },
-        ]
-
-        async def get_text_and_audio(messages) -> Tuple[str, bytearray]:
-            """This function streams text from the LLM and uses the TTS service to convert
-            that text to speech as it's received.
-            """
-            source_queue = asyncio.Queue()
-            sink_queue = asyncio.Queue()
-            sentence_aggregator = SentenceAggregator()
-            pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)
-
-            await source_queue.put(LLMContextFrame(LLMContext(messages)))
-            await source_queue.put(EndFrame())
-            await pipeline.run_pipeline()
-
-            message = ""
-            all_audio = bytearray()
-            while sink_queue.qsize():
-                frame = sink_queue.get_nowait()
-                if isinstance(frame, TextFrame):
-                    message += frame.text
-                elif isinstance(frame, AudioFrame):
-                    all_audio.extend(frame.audio)
-
-            return (message, all_audio)
-
-        async def get_bot1_statement():
-            message, audio = await get_text_and_audio(bot1_messages)
-
-            bot1_messages.append({"role": "assistant", "content": message})
-            bot2_messages.append({"role": "user", "content": message})
-
-            return audio
-
-        async def get_bot2_statement():
-            message, audio = await get_text_and_audio(bot2_messages)
-
-            bot2_messages.append({"role": "assistant", "content": message})
-            bot1_messages.append({"role": "user", "content": message})
-
-            return audio
-
-        async def argue():
-            for i in range(100):
-                print(f"In iteration {i}")
-
-                bot1_description = "A woman conservatively dressed as a librarian in a library surrounded by books, cartoon, serious, highly detailed"
-
-                (audio1, image_data1) = await asyncio.gather(
-                    get_bot1_statement(), dalle.run_image_gen(bot1_description)
-                )
-                await transport.send_queue.put(
-                    [
-                        ImageFrame(image_data1[1], image_data1[2]),
-                        AudioFrame(audio1),
-                    ]
-                )
-
-                bot2_description = "A cat dressed in a hot dog costume, cartoon, bright colors, funny, highly detailed"
-
-                (audio2, image_data2) = await asyncio.gather(
-                    get_bot2_statement(), dalle.run_image_gen(bot2_description)
-                )
-                await transport.send_queue.put(
-                    [
-                        ImageFrame(image_data2[1], image_data2[2]),
-                        AudioFrame(audio2),
-                    ]
-                )
-
-        await asyncio.gather(transport.run(), argue())
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/examples/foundational/08-custom-frame-processor.py
+++ b/examples/foundational/08-custom-frame-processor.py
@@ -4,8 +4,9 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #

+import io
 import os
-from typing import Optional
+import re

 from dotenv import load_dotenv
 from loguru import logger
@@ -16,24 +17,17 @@ from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.audio.vad.vad_analyzer import VADParams
 from pipecat.frames.frames import (
    Frame,
-    LLMContextFrame,
-    TextFrame,
-    TTSSpeakFrame,
-    UserImageRawFrame,
-    UserImageRequestFrame,
+    LLMRunFrame,
+    MetricsFrame,
 )
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.user_response import UserResponseAggregator
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.runner.types import RunnerArguments
-from pipecat.runner.utils import (
-    create_transport,
-    get_transport_client_id,
-    maybe_capture_participant_camera,
-)
+from pipecat.runner.utils import create_transport
 from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.openai.llm import OpenAILLMService
@@ -43,46 +37,41 @@ from pipecat.transports.daily.transport import DailyParams
 load_dotenv(override=True)


-class UserImageRequester(FrameProcessor):
-    """Converts incoming text into requests for user images."""
+def format_metrics(metrics, indent=0):
+    lines = []
+    tab = "\t" * indent

-    def __init__(self, participant_id: Optional[str] = None):
-        super().__init__()
-        self._participant_id = participant_id
+    for metric in metrics:
+        lines.append(tab + type(metric).__name__)
+        for field, value in vars(metric).items():
+            if hasattr(value, "__dict__") and not isinstance(
+                value, (str, int, float, bool, type(None))
+            ):
+                lines.append(f"{tab}\t{field}={type(value).__name__}")
+                for k, v in vars(value).items():
+                    lines.append(f"{tab}\t\t{k}={repr(v)}")
+            else:
+                lines.append(f"{tab}\t{field}={repr(value)}")

-    def set_participant_id(self, participant_id: str):
-        self._participant_id = participant_id
+    return "\n".join(lines)
+
+
+class MetricsFrameLogger(FrameProcessor):
+    """MetricsFrameLogger formats and logs all MetericsFrames"""
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

    async def process_frame(self, frame: Frame, direction: FrameDirection):
        await super().process_frame(frame, direction)

-        if self._participant_id and isinstance(frame, TextFrame):
-            await self.push_frame(
-                UserImageRequestFrame(self._participant_id, context=frame.text),
-                FrameDirection.UPSTREAM,
-            )
-        else:
+        if isinstance(frame, MetricsFrame):
+            logger.info(f"{frame.name}\n    {format_metrics(frame.data)}")
            await self.push_frame(frame, direction)

-
-class UserImageProcessor(FrameProcessor):
-    """Converts incoming user images into context frames."""
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
-
-        if isinstance(frame, UserImageRawFrame):
-            if frame.request and frame.request.context:
-                context = LLMContext()
-                context.add_image_frame_message(
-                    image=frame.image,
-                    text=frame.request.context,
-                    size=frame.size,
-                    format=frame.format,
-                )
-                frame = LLMContextFrame(context)
-                await self.push_frame(frame)
+        # ALWAYS push all frames
        else:
+            # SUPER IMPORTANT: always push every frame!
            await self.push_frame(frame, direction)


@@ -93,14 +82,13 @@ transport_params = {
    "daily": lambda: DailyParams(
        audio_in_enabled=True,
        audio_out_enabled=True,
-        video_in_enabled=True,
        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
    ),
    "webrtc": lambda: TransportParams(
        audio_in_enabled=True,
        audio_out_enabled=True,
-        video_in_enabled=True,
+        video_out_enabled=True,
        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
    ),
@@ -110,33 +98,37 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    user_response = UserResponseAggregator()
-
-    # Initialize the image requester without setting the participant ID yet
-    image_requester = UserImageRequester()
-
-    image_processor = UserImageProcessor()
-
    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))

-    # OpenAI GPT-4o for vision analysis
-    openai = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
-
    tts = CartesiaTTSService(
        api_key=os.getenv("CARTESIA_API_KEY"),
        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
    )

+    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    metrics_frame_processor = MetricsFrameLogger()
+
    pipeline = Pipeline(
        [
            transport.input(),
            stt,
-            user_response,
-            image_requester,
-            image_processor,
-            openai,
+            context_aggregator.user(),
+            llm,
            tts,
            transport.output(),
+            context_aggregator.assistant(),
+            metrics_frame_processor,  # pretty print metrics frames
        ]
    )

@@ -152,15 +144,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    @transport.event_handler("on_client_connected")
    async def on_client_connected(transport, client):
        logger.info(f"Client connected: {client}")
-
-        await maybe_capture_participant_camera(transport, client)
-
-        # Set the participant ID in the image requester
-        client_id = get_transport_client_id(transport, client)
-        image_requester.set_participant_id(client_id)
-
-        # Welcome message
-        await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/12-describe-image-openai.py
+++ b/examples/foundational/12-describe-image-openai.py
@@ -0,0 +1,141 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+from PIL import Image
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+
+load_dotenv(override=True)
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. You are also able to describe images.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+
+        if not runner_args.body:
+            script_dir = os.path.dirname(__file__)
+            runner_args.body = {
+                "image_path": os.path.join(script_dir, "assets", "cat.jpg"),
+                "question": "Describe this image",
+            }
+
+        image_path = runner_args.body["image_path"]
+        question = runner_args.body["question"]
+
+        # Kick off the conversation.
+        image = Image.open(image_path)
+        message = LLMContext.create_image_message(
+            image=image.tobytes(),
+            format="RGB",
+            size=image.size,
+            text=question,
+        )
+        messages.append(message)
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/12-describe-video.py
+++ b/examples/foundational/12-describe-video.py
@@ -1,180 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-import os
-from typing import Optional
-
-from dotenv import load_dotenv
-from loguru import logger
-
-from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
-from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
-from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.audio.vad.vad_analyzer import VADParams
-from pipecat.frames.frames import (
-    Frame,
-    LLMContextFrame,
-    TextFrame,
-    TTSSpeakFrame,
-    UserImageRawFrame,
-    UserImageRequestFrame,
-)
-from pipecat.pipeline.pipeline import Pipeline
-from pipecat.pipeline.runner import PipelineRunner
-from pipecat.pipeline.task import PipelineTask
-from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.user_response import UserResponseAggregator
-from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
-from pipecat.runner.types import RunnerArguments
-from pipecat.runner.utils import (
-    create_transport,
-    get_transport_client_id,
-    maybe_capture_participant_camera,
-)
-from pipecat.services.cartesia.tts import CartesiaTTSService
-from pipecat.services.deepgram.stt import DeepgramSTTService
-from pipecat.services.moondream.vision import MoondreamService
-from pipecat.transports.base_transport import BaseTransport, TransportParams
-from pipecat.transports.daily.transport import DailyParams
-
-load_dotenv(override=True)
-
-
-class UserImageRequester(FrameProcessor):
-    """Converts incoming text into requests for user images."""
-
-    def __init__(self, participant_id: Optional[str] = None):
-        super().__init__()
-        self._participant_id = participant_id
-
-    def set_participant_id(self, participant_id: str):
-        self._participant_id = participant_id
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
-
-        if self._participant_id and isinstance(frame, TextFrame):
-            await self.push_frame(
-                UserImageRequestFrame(self._participant_id, context=frame.text),
-                FrameDirection.UPSTREAM,
-            )
-        else:
-            await self.push_frame(frame, direction)
-
-
-class UserImageProcessor(FrameProcessor):
-    """Converts incoming user images into context frames."""
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
-
-        if isinstance(frame, UserImageRawFrame):
-            if frame.request and frame.request.context:
-                context = LLMContext()
-                context.add_image_frame_message(
-                    image=frame.image,
-                    text=frame.request.context,
-                    size=frame.size,
-                    format=frame.format,
-                )
-                frame = LLMContextFrame(context)
-                await self.push_frame(frame)
-        else:
-            await self.push_frame(frame, direction)
-
-
-# We store functions so objects (e.g. SileroVADAnalyzer) don't get
-# instantiated. The function will be called when the desired transport gets
-# selected.
-transport_params = {
-    "daily": lambda: DailyParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        video_in_enabled=True,
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
-        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
-    ),
-    "webrtc": lambda: TransportParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        video_in_enabled=True,
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
-        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
-    ),
-}
-
-
-async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
-    logger.info(f"Starting bot")
-
-    user_response = UserResponseAggregator()
-
-    # Initialize the image requester without setting the participant ID yet
-    image_requester = UserImageRequester()
-
-    image_processor = UserImageProcessor()
-
-    # If you run into weird description, try with use_cpu=True
-    moondream = MoondreamService()
-
-    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
-
-    tts = CartesiaTTSService(
-        api_key=os.getenv("CARTESIA_API_KEY"),
-        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
-    )
-
-    pipeline = Pipeline(
-        [
-            transport.input(),
-            stt,
-            user_response,
-            image_requester,
-            image_processor,
-            moondream,
-            tts,
-            transport.output(),
-        ]
-    )
-
-    task = PipelineTask(
-        pipeline,
-        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
-    )
-
-    @transport.event_handler("on_client_connected")
-    async def on_client_connected(transport, client):
-        logger.info(f"Client connected: {client}")
-
-        await maybe_capture_participant_camera(transport, client)
-
-        # Set the participant ID in the image requester
-        client_id = get_transport_client_id(transport, client)
-        image_requester.set_participant_id(client_id)
-
-        # Welcome message
-        await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
-
-    @transport.event_handler("on_client_disconnected")
-    async def on_client_disconnected(transport, client):
-        logger.info(f"Client disconnected")
-        await task.cancel()
-
-    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
-
-    await runner.run(task)
-
-
-async def bot(runner_args: RunnerArguments):
-    """Main bot entry point compatible with Pipecat Cloud."""
-    transport = await create_transport(runner_args, transport_params)
-    await run_bot(transport, runner_args)
-
-
-if __name__ == "__main__":
-    from pipecat.runner.run import main
-
-    main()
--- a/examples/foundational/12a-describe-image-anthropic.py
+++ b/examples/foundational/12a-describe-image-anthropic.py
@@ -4,36 +4,25 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #

+
 import os
-from typing import Optional

 from dotenv import load_dotenv
 from loguru import logger
+from PIL import Image

 from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
 from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.audio.vad.vad_analyzer import VADParams
-from pipecat.frames.frames import (
-    Frame,
-    LLMContextFrame,
-    TextFrame,
-    TTSSpeakFrame,
-    UserImageRawFrame,
-    UserImageRequestFrame,
-)
+from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.user_response import UserResponseAggregator
-from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
-from pipecat.runner.utils import (
-    create_transport,
-    get_transport_client_id,
-    maybe_capture_participant_camera,
-)
+from pipecat.runner.utils import create_transport
 from pipecat.services.anthropic.llm import AnthropicLLMService
 from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.deepgram.stt import DeepgramSTTService
@@ -43,49 +32,6 @@ from pipecat.transports.daily.transport import DailyParams
 load_dotenv(override=True)


-class UserImageRequester(FrameProcessor):
-    """Converts incoming text into requests for user images."""
-
-    def __init__(self, participant_id: Optional[str] = None):
-        super().__init__()
-        self._participant_id = participant_id
-
-    def set_participant_id(self, participant_id: str):
-        self._participant_id = participant_id
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
-
-        if self._participant_id and isinstance(frame, TextFrame):
-            await self.push_frame(
-                UserImageRequestFrame(self._participant_id, context=frame.text),
-                FrameDirection.UPSTREAM,
-            )
-        else:
-            await self.push_frame(frame, direction)
-
-
-class UserImageProcessor(FrameProcessor):
-    """Converts incoming user images into context frames."""
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
-
-        if isinstance(frame, UserImageRawFrame):
-            if frame.request and frame.request.context:
-                context = LLMContext()
-                context.add_image_frame_message(
-                    image=frame.image,
-                    text=frame.request.context,
-                    size=frame.size,
-                    format=frame.format,
-                )
-                frame = LLMContextFrame(context)
-                await self.push_frame(frame)
-        else:
-            await self.push_frame(frame, direction)
-
-
 # We store functions so objects (e.g. SileroVADAnalyzer) don't get
 # instantiated. The function will be called when the desired transport gets
 # selected.
@@ -93,14 +39,12 @@ transport_params = {
    "daily": lambda: DailyParams(
        audio_in_enabled=True,
        audio_out_enabled=True,
-        video_in_enabled=True,
        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
    ),
    "webrtc": lambda: TransportParams(
        audio_in_enabled=True,
        audio_out_enabled=True,
-        video_in_enabled=True,
        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
    ),
@@ -110,33 +54,34 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    user_response = UserResponseAggregator()
-
-    # Initialize the image requester without setting the participant ID yet
-    image_requester = UserImageRequester()
-
-    image_processor = UserImageProcessor()
-
    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))

-    # Anthropic for vision analysis
-    anthropic = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY"))
-
    tts = CartesiaTTSService(
        api_key=os.getenv("CARTESIA_API_KEY"),
        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
    )

+    llm = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY"))
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. You are also able to describe images.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
    pipeline = Pipeline(
        [
-            transport.input(),
-            stt,
-            user_response,
-            image_requester,
-            image_processor,
-            anthropic,
-            tts,
-            transport.output(),
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
        ]
    )

@@ -151,16 +96,28 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    @transport.event_handler("on_client_connected")
    async def on_client_connected(transport, client):
-        logger.info(f"Client connected: {client}")
+        logger.info(f"Client connected")

-        await maybe_capture_participant_camera(transport, client)
+        if not runner_args.body:
+            script_dir = os.path.dirname(__file__)
+            runner_args.body = {
+                "image_path": os.path.join(script_dir, "assets", "cat.jpg"),
+                "question": "Describe this image",
+            }

-        # Set the participant ID in the image requester
-        client_id = get_transport_client_id(transport, client)
-        image_requester.set_participant_id(client_id)
+        image_path = runner_args.body["image_path"]
+        question = runner_args.body["question"]

-        # Welcome message
-        await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
+        # Kick off the conversation.
+        image = Image.open(image_path)
+        message = LLMContext.create_image_message(
+            image=image.tobytes(),
+            format="RGB",
+            size=image.size,
+            text=question,
+        )
+        messages.append(message)
+        await task.queue_frames([LLMRunFrame()])

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/12b-describe-image-aws.py
+++ b/examples/foundational/12b-describe-image-aws.py
@@ -0,0 +1,148 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+from PIL import Image
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.aws.llm import AWSBedrockLLMService
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+
+load_dotenv(override=True)
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    llm = AWSBedrockLLMService(
+        aws_region="us-west-2",
+        model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+        # Note: usually, prefer providing latency="optimized" param.
+        # Here we can't because AWS Bedrock doesn't support it for Claude 3.7,
+        # which we need for image input.
+        params=AWSBedrockLLMService.InputParams(temperature=0.8),
+    )
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. You are also able to describe images.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+
+        if not runner_args.body:
+            script_dir = os.path.dirname(__file__)
+            runner_args.body = {
+                "image_path": os.path.join(script_dir, "assets", "cat.jpg"),
+                "question": "Describe this image",
+            }
+
+        image_path = runner_args.body["image_path"]
+        question = runner_args.body["question"]
+
+        # Kick off the conversation.
+        image = Image.open(image_path)
+        message = LLMContext.create_image_message(
+            image=image.tobytes(),
+            format="RGB",
+            size=image.size,
+            text=question,
+        )
+        messages.append(message)
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/12c-describe-image-gemini-flash.py
+++ b/examples/foundational/12c-describe-image-gemini-flash.py
@@ -0,0 +1,141 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+from PIL import Image
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.google.llm import GoogleLLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+
+load_dotenv(override=True)
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. You are also able to describe images.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+
+        if not runner_args.body:
+            script_dir = os.path.dirname(__file__)
+            runner_args.body = {
+                "image_path": os.path.join(script_dir, "assets", "cat.jpg"),
+                "question": "Describe this image",
+            }
+
+        image_path = runner_args.body["image_path"]
+        question = runner_args.body["question"]
+
+        # Kick off the conversation.
+        image = Image.open(image_path)
+        message = LLMContext.create_image_message(
+            image=image.tobytes(),
+            format="RGB",
+            size=image.size,
+            text=question,
+        )
+        messages.append(message)
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/12d-describe-image-moondream.py
+++ b/examples/foundational/12d-describe-image-moondream.py
@@ -0,0 +1,122 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+from PIL import Image
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import UserImageRawFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.moondream.vision import MoondreamService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+
+load_dotenv(override=True)
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    vision = MoondreamService()
+
+    pipeline = Pipeline(
+        [
+            vision,  # Vision
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+
+        if not runner_args.body:
+            script_dir = os.path.dirname(__file__)
+            runner_args.body = {
+                "image_path": os.path.join(script_dir, "assets", "cat.jpg"),
+                "question": "Describe this image",
+            }
+
+        image_path = runner_args.body["image_path"]
+        question = runner_args.body["question"]
+
+        # Describe the image.
+        image = Image.open(image_path)
+        await task.queue_frames(
+            [
+                UserImageRawFrame(
+                    image=image.tobytes(),
+                    format="RGB",
+                    size=image.size,
+                    text=question,
+                )
+            ]
+        )
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/13f-cartesia-transcription.py
+++ b/examples/foundational/13f-cartesia-transcription.py
@@ -48,10 +48,7 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    stt = CartesiaSTTService(
-        api_key=os.getenv("CARTESIA_API_KEY"),
-        base_url=os.getenv("CARTESIA_BASE_URL"),
-    )
+    stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))

    tl = TranscriptionLogger()

--- a/examples/foundational/14d-function-calling-anthropic-video.py
+++ b/examples/foundational/14d-function-calling-anthropic-video.py
@@ -4,8 +4,6 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #

-
-import asyncio
 import os

 from dotenv import load_dotenv
@@ -17,12 +15,13 @@ from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
 from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.audio.vad.vad_analyzer import VADParams
-from pipecat.frames.frames import LLMRunFrame
+from pipecat.frames.frames import LLMRunFrame, UserImageRequestFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.frame_processor import FrameDirection
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import (
    create_transport,
@@ -39,34 +38,30 @@ from pipecat.transports.daily.transport import DailyParams
 load_dotenv(override=True)


-# Global variable to store the client ID
-client_id = ""
+async def fetch_user_image(params: FunctionCallParams):
+    """Fetch the user image and push it to the LLM.

-
-async def get_weather(params: FunctionCallParams):
-    location = params.arguments["location"]
-    await params.result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
-
-
-async def get_image(params: FunctionCallParams):
+    When called, this function pushes a UserImageRequestFrame upstream to the
+    transport. As a result, the transport will request the user image and push a
+    UserImageRawFrame downstream which will be added to the context by the LLM
+    assistant aggregator.
+    """
+    user_id = params.arguments["user_id"]
    question = params.arguments["question"]
-    logger.debug(f"Requesting image with user_id={client_id}, question={question}")
+    logger.debug(f"Requesting image with user_id={user_id}, question={question}")

-    # Request the image frame
-    await params.llm.request_image_frame(
-        user_id=client_id,
-        function_name=params.function_name,
-        tool_call_id=params.tool_call_id,
-        text_content=question,
+    # Request a user image frame and indicate that it should be added to the
+    # context.
+    await params.llm.push_frame(
+        UserImageRequestFrame(user_id=user_id, text=question, append_to_context=True),
+        FrameDirection.UPSTREAM,
    )

-    # Wait a short time for the frame to be processed
-    await asyncio.sleep(0.5)
+    await params.result_callback(None)

-    # Return a result to complete the function call
-    await params.result_callback(
-        f"I've captured an image from your camera and I'm analyzing what you asked about: {question}"
-    )
+    # Instead of None, it's possible to also provide a tool call answer to
+    # tell the LLM that we are grabbing the image to analyze.
+    # await params.result_callback({"result": "Image is being captured."})


 # We store functions so objects (e.g. SileroVADAnalyzer) don't get
@@ -100,70 +95,32 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
    )

-    llm = AnthropicLLMService(
-        api_key=os.getenv("ANTHROPIC_API_KEY"),
-        model="claude-3-7-sonnet-latest",
-        params=AnthropicLLMService.InputParams(enable_prompt_caching=True),
-    )
-    llm.register_function("get_weather", get_weather)
-    llm.register_function("get_image", get_image)
+    # Anthropic for vision analysis
+    llm = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY"))
+    llm.register_function("fetch_user_image", fetch_user_image)

-    weather_function = FunctionSchema(
-        name="get_weather",
-        description="Get the current weather",
+    fetch_image_function = FunctionSchema(
+        name="fetch_user_image",
+        description="Called when the user requests a description of their camera feed",
        properties={
-            "location": {
+            "user_id": {
                "type": "string",
-                "description": "The city and state, e.g. San Francisco, CA",
+                "description": "The ID of the user to grab the image from",
            },
-        },
-        required=["location"],
-    )
-    get_image_function = FunctionSchema(
-        name="get_image",
-        description="Get an image from the video stream.",
-        properties={
            "question": {
                "type": "string",
-                "description": "The question that the user is asking about the image.",
-            }
+                "description": "The question that the user is asking about the image",
+            },
        },
-        required=["question"],
+        required=["user_id", "question"],
    )
-    tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
-
-    system_prompt = """\
-You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
-
-Your response will be turned into speech so use only simple words and punctuation.
-
-You have access to two tools: get_weather and get_image.
-
-You can respond to questions about the weather using the get_weather tool.
-
-You can answer questions about the user's video stream using the get_image tool. Some examples of phrases that \
-indicate you should use the get_image tool are:
- What do you see?
- What's in the video?
- Can you describe the video?
- Tell me about what you see.
- Tell me something interesting about what you see.
- What's happening in the video?
-
-If you need to use a tool, simply use the tool. Do not tell the user the tool you are using. Be brief and concise.
-    """
+    tools = ToolsSchema(standard_tools=[fetch_image_function])

    messages = [
        {
            "role": "system",
-            "content": [
-                {
-                    "type": "text",
-                    "text": system_prompt,
-                }
-            ],
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. You are able to describe images from the user camera.",
        },
-        {"role": "user", "content": "Start the conversation by introducing yourself."},
    ]

    context = LLMContext(messages, tools)
@@ -173,11 +130,11 @@ If you need to use a tool, simply use the tool. Do not tell the user the tool yo
        [
            transport.input(),  # Transport user input
            stt,  # STT
-            context_aggregator.user(),  # User speech to text
+            context_aggregator.user(),  # User responses
            llm,  # LLM
            tts,  # TTS
            transport.output(),  # Transport bot output
-            context_aggregator.assistant(),  # Assistant spoken responses and tool context
+            context_aggregator.assistant(),  # Assistant spoken responses
        ]
    )

@@ -196,10 +153,16 @@ If you need to use a tool, simply use the tool. Do not tell the user the tool yo

        await maybe_capture_participant_camera(transport, client)

-        global client_id
+        # Set the participant ID in the image requester
        client_id = get_transport_client_id(transport, client)

        # Kick off the conversation.
+        messages.append(
+            {
+                "role": "system",
+                "content": f"Please introduce yourself to the user. Use '{client_id}' as the user ID during function calls.",
+            }
+        )
        await task.queue_frames([LLMRunFrame()])

    @transport.event_handler("on_client_disconnected")
--- a/examples/foundational/14d-function-calling-aws-video.py
+++ b/examples/foundational/14d-function-calling-aws-video.py
@@ -5,29 +5,23 @@
 #

 import os
-from typing import Optional

 from dotenv import load_dotenv
 from loguru import logger

+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
 from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
 from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.audio.vad.vad_analyzer import VADParams
-from pipecat.frames.frames import (
-    Frame,
-    LLMContextFrame,
-    TextFrame,
-    TTSSpeakFrame,
-    UserImageRawFrame,
-    UserImageRequestFrame,
-)
+from pipecat.frames.frames import LLMRunFrame, UserImageRequestFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.user_response import UserResponseAggregator
-from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.frame_processor import FrameDirection
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import (
    create_transport,
@@ -37,54 +31,37 @@ from pipecat.runner.utils import (
 from pipecat.services.aws.llm import AWSBedrockLLMService
 from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.llm_service import FunctionCallParams
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams

 load_dotenv(override=True)


-class UserImageRequester(FrameProcessor):
-    """Converts incoming text into requests for user images."""
+async def fetch_user_image(params: FunctionCallParams):
+    """Fetch the user image and push it to the LLM.

-    def __init__(self, participant_id: Optional[str] = None):
-        super().__init__()
-        self._participant_id = participant_id
+    When called, this function pushes a UserImageRequestFrame upstream to the
+    transport. As a result, the transport will request the user image and push a
+    UserImageRawFrame downstream which will be added to the context by the LLM
+    assistant aggregator.
+    """
+    user_id = params.arguments["user_id"]
+    question = params.arguments["question"]
+    logger.debug(f"Requesting image with user_id={user_id}, question={question}")

-    def set_participant_id(self, participant_id: str):
-        self._participant_id = participant_id
+    # Request a user image frame and indicate that it should be added to the
+    # context.
+    await params.llm.push_frame(
+        UserImageRequestFrame(user_id=user_id, text=question, append_to_context=True),
+        FrameDirection.UPSTREAM,
+    )

-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
+    await params.result_callback(None)

-        if self._participant_id and isinstance(frame, TextFrame):
-            await self.push_frame(
-                UserImageRequestFrame(self._participant_id, context=frame.text),
-                FrameDirection.UPSTREAM,
-            )
-        else:
-            await self.push_frame(frame, direction)
-
-
-class UserImageProcessor(FrameProcessor):
-    """Converts incoming user images into context frames."""
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
-
-        if isinstance(frame, UserImageRawFrame):
-            if frame.request and frame.request.context:
-                # Note: AWS Bedrock does not yet support the universal LLMContext
-                context = LLMContext()
-                context.add_image_frame_message(
-                    image=frame.image,
-                    text=frame.request.context,
-                    size=frame.size,
-                    format=frame.format,
-                )
-                frame = LLMContextFrame(context)
-                await self.push_frame(frame)
-        else:
-            await self.push_frame(frame, direction)
+    # Instead of None, it's possible to also provide a tool call answer to
+    # tell the LLM that we are grabbing the image to analyze.
+    # await params.result_callback({"result": "Image is being captured."})


 # We store functions so objects (e.g. SileroVADAnalyzer) don't get
@@ -111,17 +88,15 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    user_response = UserResponseAggregator()
-
-    # Initialize the image requester without setting the participant ID yet
-    image_requester = UserImageRequester()
-
-    image_processor = UserImageProcessor()
-
    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))

+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
    # AWS for vision analysis
-    aws = AWSBedrockLLMService(
+    llm = AWSBedrockLLMService(
        aws_region="us-west-2",
        model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
        # Note: usually, prefer providing latency="optimized" param.
@@ -129,22 +104,44 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        # which we need for image input.
        params=AWSBedrockLLMService.InputParams(temperature=0.8),
    )
+    llm.register_function("fetch_user_image", fetch_user_image)

-    tts = CartesiaTTSService(
-        api_key=os.getenv("CARTESIA_API_KEY"),
-        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    fetch_image_function = FunctionSchema(
+        name="fetch_user_image",
+        description="Called when the user requests a description of their camera feed",
+        properties={
+            "user_id": {
+                "type": "string",
+                "description": "The ID of the user to grab the image from",
+            },
+            "question": {
+                "type": "string",
+                "description": "The question that the user is asking about the image",
+            },
+        },
+        required=["user_id", "question"],
    )
+    tools = ToolsSchema(standard_tools=[fetch_image_function])
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. You are able to describe images from the user camera.",
+        },
+    ]
+
+    context = LLMContext(messages, tools)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
-            transport.input(),
-            stt,
-            user_response,
-            image_requester,
-            image_processor,
-            aws,
-            tts,
-            transport.output(),
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
        ]
    )

@@ -165,10 +162,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

        # Set the participant ID in the image requester
        client_id = get_transport_client_id(transport, client)
-        image_requester.set_participant_id(client_id)

-        # Welcome message
-        await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
+        # Kick off the conversation.
+        messages.append(
+            {
+                "role": "system",
+                "content": f"Please introduce yourself to the user. Use '{client_id}' as the user ID during function calls.",
+            }
+        )
+        await task.queue_frames([LLMRunFrame()])

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/14d-function-calling-gemini-flash-video.py
+++ b/examples/foundational/14d-function-calling-gemini-flash-video.py
@@ -5,29 +5,23 @@
 #

 import os
-from typing import Optional

 from dotenv import load_dotenv
 from loguru import logger

+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
 from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
 from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.audio.vad.vad_analyzer import VADParams
-from pipecat.frames.frames import (
-    Frame,
-    LLMContextFrame,
-    TextFrame,
-    TTSSpeakFrame,
-    UserImageRawFrame,
-    UserImageRequestFrame,
-)
+from pipecat.frames.frames import LLMRunFrame, UserImageRequestFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.user_response import UserResponseAggregator
-from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.frame_processor import FrameDirection
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import (
    create_transport,
@@ -37,53 +31,37 @@ from pipecat.runner.utils import (
 from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.google.llm import GoogleLLMService
+from pipecat.services.llm_service import FunctionCallParams
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams

 load_dotenv(override=True)


-class UserImageRequester(FrameProcessor):
-    """Converts incoming text into requests for user images."""
+async def fetch_user_image(params: FunctionCallParams):
+    """Fetch the user image and push it to the LLM.

-    def __init__(self, participant_id: Optional[str] = None):
-        super().__init__()
-        self._participant_id = participant_id
+    When called, this function pushes a UserImageRequestFrame upstream to the
+    transport. As a result, the transport will request the user image and push a
+    UserImageRawFrame downstream which will be added to the context by the LLM
+    assistant aggregator.
+    """
+    user_id = params.arguments["user_id"]
+    question = params.arguments["question"]
+    logger.debug(f"Requesting image with user_id={user_id}, question={question}")

-    def set_participant_id(self, participant_id: str):
-        self._participant_id = participant_id
+    # Request a user image frame and indicate that it should be added to the
+    # context.
+    await params.llm.push_frame(
+        UserImageRequestFrame(user_id=user_id, text=question, append_to_context=True),
+        FrameDirection.UPSTREAM,
+    )

-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
+    await params.result_callback(None)

-        if self._participant_id and isinstance(frame, TextFrame):
-            await self.push_frame(
-                UserImageRequestFrame(self._participant_id, context=frame.text),
-                FrameDirection.UPSTREAM,
-            )
-        else:
-            await self.push_frame(frame, direction)
-
-
-class UserImageProcessor(FrameProcessor):
-    """Converts incoming user images into context frames."""
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
-
-        if isinstance(frame, UserImageRawFrame):
-            if frame.request and frame.request.context:
-                context = LLMContext()
-                context.add_image_frame_message(
-                    image=frame.image,
-                    text=frame.request.context,
-                    size=frame.size,
-                    format=frame.format,
-                )
-                frame = LLMContextFrame(context)
-                await self.push_frame(frame)
-        else:
-            await self.push_frame(frame, direction)
+    # Instead of None, it's possible to also provide a tool call answer to
+    # tell the LLM that we are grabbing the image to analyze.
+    # await params.result_callback({"result": "Image is being captured."})


 # We store functions so objects (e.g. SileroVADAnalyzer) don't get
@@ -110,33 +88,53 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    user_response = UserResponseAggregator()
-
-    # Initialize the image requester without setting the participant ID yet
-    image_requester = UserImageRequester()
-
-    image_processor = UserImageProcessor()
-
    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))

-    # Google Gemini model for vision analysis
-    google = GoogleLLMService(model="gemini-2.0-flash-001", api_key=os.getenv("GOOGLE_API_KEY"))
-
    tts = CartesiaTTSService(
        api_key=os.getenv("CARTESIA_API_KEY"),
        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
    )

+    # Google Gemini model for vision analysis
+    llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
+    llm.register_function("fetch_user_image", fetch_user_image)
+
+    fetch_image_function = FunctionSchema(
+        name="fetch_user_image",
+        description="Called when the user requests a description of their camera feed",
+        properties={
+            "user_id": {
+                "type": "string",
+                "description": "The ID of the user to grab the image from",
+            },
+            "question": {
+                "type": "string",
+                "description": "The question that the user is asking about the image",
+            },
+        },
+        required=["user_id", "question"],
+    )
+    tools = ToolsSchema(standard_tools=[fetch_image_function])
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. You are able to describe images from the user camera.",
+        },
+    ]
+
+    context = LLMContext(messages, tools)
+    context_aggregator = LLMContextAggregatorPair(context)
+
    pipeline = Pipeline(
        [
-            transport.input(),
-            stt,
-            user_response,
-            image_requester,
-            image_processor,
-            google,
-            tts,
-            transport.output(),
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
        ]
    )

@@ -157,10 +155,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

        # Set the participant ID in the image requester
        client_id = get_transport_client_id(transport, client)
-        image_requester.set_participant_id(client_id)

-        # Welcome message
-        await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
+        # Kick off the conversation.
+        messages.append(
+            {
+                "role": "system",
+                "content": f"Please introduce yourself to the user. Use '{client_id}' as the user ID during function calls.",
+            }
+        )
+        await task.queue_frames([LLMRunFrame()])

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/14d-function-calling-moondream-video.py
+++ b/examples/foundational/14d-function-calling-moondream-video.py
@@ -0,0 +1,190 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame, UserImageRequestFrame
+from pipecat.pipeline.parallel_pipeline import ParallelPipeline
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import (
+    create_transport,
+    get_transport_client_id,
+    maybe_capture_participant_camera,
+)
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.llm_service import FunctionCallParams
+from pipecat.services.moondream.vision import MoondreamService
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+
+load_dotenv(override=True)
+
+
+async def fetch_user_image(params: FunctionCallParams):
+    """Fetch the user image.
+
+    When called, this function pushes a UserImageRequestFrame upstream to the
+    transport. As a result, the transport will request the user image and push a
+    UserImageRawFrame downstream.
+    """
+    user_id = params.arguments["user_id"]
+    question = params.arguments["question"]
+    logger.debug(f"Requesting image with user_id={user_id}, question={question}")
+
+    # Request a user image frame. In this case, we don't want the requested
+    # image to be added to the context because we will process it with
+    # Moondream.
+    await params.llm.push_frame(
+        UserImageRequestFrame(user_id=user_id, text=question, append_to_context=False),
+        FrameDirection.UPSTREAM,
+    )
+
+    await params.result_callback(None)
+
+    # Instead of None, it's possible to also provide a tool call answer to
+    # tell the LLM that we are grabbing the image to analyze.
+    # await params.result_callback({"result": "Image is being captured."})
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        video_in_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        video_in_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+    llm.register_function("fetch_user_image", fetch_user_image)
+
+    fetch_image_function = FunctionSchema(
+        name="fetch_user_image",
+        description="Called when the user requests a description of their camera feed",
+        properties={
+            "user_id": {
+                "type": "string",
+                "description": "The ID of the user to grab the image from",
+            },
+            "question": {
+                "type": "string",
+                "description": "The question that the user is asking about the image",
+            },
+        },
+        required=["user_id", "question"],
+    )
+    tools = ToolsSchema(standard_tools=[fetch_image_function])
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. You are able to describe images from the user camera.",
+        },
+    ]
+
+    context = LLMContext(messages, tools)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    # If you run into weird description, try with use_cpu=True
+    moondream = MoondreamService()
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            ParallelPipeline(
+                [llm],  # LLM
+                [moondream],
+            ),
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected: {client}")
+
+        await maybe_capture_participant_camera(transport, client)
+
+        # Set the participant ID in the image requester
+        client_id = get_transport_client_id(transport, client)
+
+        # Kick off the conversation.
+        messages.append(
+            {
+                "role": "system",
+                "content": f"Please introduce yourself to the user. Use '{client_id}' as the user ID during function calls.",
+            }
+        )
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/14d-function-calling-openai-video.py
+++ b/examples/foundational/14d-function-calling-openai-video.py
@@ -0,0 +1,186 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame, UserImageRequestFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import (
+    create_transport,
+    get_transport_client_id,
+    maybe_capture_participant_camera,
+)
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.llm_service import FunctionCallParams
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+
+load_dotenv(override=True)
+
+
+async def fetch_user_image(params: FunctionCallParams):
+    """Fetch the user image and push it to the LLM.
+
+    When called, this function pushes a UserImageRequestFrame upstream to the
+    transport. As a result, the transport will request the user image and push a
+    UserImageRawFrame downstream which will be added to the context by the LLM
+    assistant aggregator.
+    """
+    user_id = params.arguments["user_id"]
+    question = params.arguments["question"]
+    logger.debug(f"Requesting image with user_id={user_id}, question={question}")
+
+    # Request a user image frame and indicate that it should be added to the
+    # context.
+    await params.llm.push_frame(
+        UserImageRequestFrame(user_id=user_id, text=question, append_to_context=True),
+        FrameDirection.UPSTREAM,
+    )
+
+    await params.result_callback(None)
+
+    # Instead of None, it's possible to also provide a tool call answer to
+    # tell the LLM that we are grabbing the image to analyze.
+    # await params.result_callback({"result": "Image is being captured."})
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        video_in_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        video_in_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+    llm.register_function("fetch_user_image", fetch_user_image)
+
+    fetch_image_function = FunctionSchema(
+        name="fetch_user_image",
+        description="Called when the user requests a description of their camera feed",
+        properties={
+            "user_id": {
+                "type": "string",
+                "description": "The ID of the user to grab the image from",
+            },
+            "question": {
+                "type": "string",
+                "description": "The question that the user is asking about the image",
+            },
+        },
+        required=["user_id", "question"],
+    )
+    tools = ToolsSchema(standard_tools=[fetch_image_function])
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. You are able to describe images from the user camera.",
+        },
+    ]
+
+    context = LLMContext(messages, tools)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+
+        await maybe_capture_participant_camera(transport, client)
+
+        client_id = get_transport_client_id(transport, client)
+
+        # Kick off the conversation.
+        messages.append(
+            {
+                "role": "system",
+                "content": f"Please introduce yourself to the user. Use '{client_id}' as the user ID during function calls.",
+            }
+        )
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/14j-function-calling-nim.py
+++ b/examples/foundational/14j-function-calling-nim.py
@@ -75,7 +75,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        # text_filters=[MarkdownTextFilter()],
    )

-    llm = NimLLMService(api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.3-70b-instruct")
+    llm = NimLLMService(
+        api_key=os.getenv("NVIDIA_API_KEY"),
+        model="nvidia/llama-3.3-nemotron-super-49b-v1.5",
+        # Recommended when turning thinking off
+        params=NimLLMService.InputParams(temperature=0.0),
+    )
    # You can also register a function_name of None to get all functions
    # sent to the same callback with an additional function_name parameter.
    llm.register_function("get_current_weather", fetch_weather_from_api)
@@ -102,6 +107,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    )
    tools = ToolsSchema(standard_tools=[weather_function])
    messages = [
+        # Disable thinking by sending this message first
+        # Check the model for the corresponding "no thinking" message
+        {"role": "system", "content": "/no_think"},
        {
            "role": "system",
            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
--- a/examples/foundational/14n-function-calling-perplexity.py
+++ b/examples/foundational/14n-function-calling-perplexity.py
@@ -77,7 +77,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    messages = [
        {
            "role": "user",
-            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but try to be brief.",
        },
    ]

--- a/examples/foundational/14p-function-calling-gemini-vertex-ai.py
+++ b/examples/foundational/14p-function-calling-gemini-vertex-ai.py
@@ -76,9 +76,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    llm = GoogleVertexLLMService(
        credentials=os.getenv("GOOGLE_VERTEX_TEST_CREDENTIALS"),
-        params=GoogleVertexLLMService.InputParams(
-            project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
-        ),
+        project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
+        location=os.getenv("GOOGLE_CLOUD_LOCATION"),
    )
    # You can aslo register a function_name of None to get all functions
    # sent to the same callback with an additional function_name parameter.
--- a/examples/foundational/14r-function-calling-aws.py
+++ b/examples/foundational/14r-function-calling-aws.py
@@ -79,8 +79,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    llm = AWSBedrockLLMService(
        aws_region="us-west-2",
-        model="us.anthropic.claude-3-5-haiku-20241022-v1:0",
-        params=AWSBedrockLLMService.InputParams(temperature=0.8, latency="optimized"),
+        model="us.anthropic.claude-haiku-4-5-20251001-v1:0",
+        params=AWSBedrockLLMService.InputParams(temperature=0.8),
    )

    # You can also register a function_name of None to get all functions
--- a/examples/foundational/14x-function-calling-openpipe.py
+++ b/examples/foundational/14x-function-calling-openpipe.py
@@ -4,9 +4,8 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #

-
-import asyncio
 import os
+import time

 from dotenv import load_dotenv
 from loguru import logger
@@ -17,56 +16,31 @@ from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
 from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.audio.vad.vad_analyzer import VADParams
-from pipecat.frames.frames import LLMRunFrame
+from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
-from pipecat.runner.utils import (
-    create_transport,
-    get_transport_client_id,
-    maybe_capture_participant_camera,
-)
+from pipecat.runner.utils import create_transport
 from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.services.openpipe.llm import OpenPipeLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams

 load_dotenv(override=True)


-# Global variable to store the client ID
-client_id = ""
+async def fetch_weather_from_api(params: FunctionCallParams):
+    await params.result_callback({"conditions": "nice", "temperature": "75"})


-async def get_weather(params: FunctionCallParams):
-    location = params.arguments["location"]
-    await params.result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
-
-
-async def get_image(params: FunctionCallParams):
-    question = params.arguments["question"]
-    logger.debug(f"Requesting image with user_id={client_id}, question={question}")
-
-    # Request the image frame
-    await params.llm.request_image_frame(
-        user_id=client_id,
-        function_name=params.function_name,
-        tool_call_id=params.tool_call_id,
-        text_content=question,
-    )
-
-    # Wait a short time for the frame to be processed
-    await asyncio.sleep(0.5)
-
-    # Return a result to complete the function call
-    await params.result_callback(
-        f"I've captured an image from your camera and I'm analyzing what you asked about: {question}"
-    )
+async def fetch_restaurant_recommendation(params: FunctionCallParams):
+    await params.result_callback({"name": "The Golden Dragon"})


 # We store functions so objects (e.g. SileroVADAnalyzer) don't get
@@ -76,14 +50,18 @@ transport_params = {
    "daily": lambda: DailyParams(
        audio_in_enabled=True,
        audio_out_enabled=True,
-        video_in_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
    ),
    "webrtc": lambda: TransportParams(
        audio_in_enabled=True,
        audio_out_enabled=True,
-        video_in_enabled=True,
        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
    ),
@@ -100,12 +78,24 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
    )

-    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
-    llm.register_function("get_weather", get_weather)
-    llm.register_function("get_image", get_image)
+    timestamp = int(time.time())
+    llm = OpenPipeLLMService(
+        api_key=os.getenv("OPENAI_API_KEY"),
+        openpipe_api_key=os.getenv("OPENPIPE_API_KEY"),
+        tags={"conversation_id": f"pipecat-{timestamp}"},
+    )
+
+    # You can also register a function_name of None to get all functions
+    # sent to the same callback with an additional function_name parameter.
+    llm.register_function("get_current_weather", fetch_weather_from_api)
+    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
+
+    @llm.event_handler("on_function_calls_started")
+    async def on_function_calls_started(service, function_calls):
+        await tts.queue_frame(TTSSpeakFrame("Let me check on that."))

    weather_function = FunctionSchema(
-        name="get_weather",
+        name="get_current_weather",
        description="Get the current weather",
        properties={
            "location": {
@@ -118,41 +108,26 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                "description": "The temperature unit to use. Infer this from the user's location.",
            },
        },
+        required=["location", "format"],
+    )
+    restaurant_function = FunctionSchema(
+        name="get_restaurant_recommendation",
+        description="Get a restaurant recommendation",
+        properties={
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA",
+            },
+        },
        required=["location"],
    )
-    get_image_function = FunctionSchema(
-        name="get_image",
-        description="Get an image from the video stream.",
-        properties={
-            "question": {
-                "type": "string",
-                "description": "The question that the user is asking about the image.",
-            }
-        },
-        required=["question"],
-    )
-    tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
+    tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])

-    system_prompt = """\
-You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
-
-Your response will be turned into speech so use only simple words and punctuation.
-
-You have access to two tools: get_weather and get_image.
-
-You can respond to questions about the weather using the get_weather tool.
-
-You can answer questions about the user's video stream using the get_image tool. Some examples of phrases that \
-indicate you should use the get_image tool are:
- What do you see?
- What's in the video?
- Can you describe the video?
- Tell me about what you see.
- Tell me something interesting about what you see.
- What's happening in the video?
-"""
    messages = [
-        {"role": "system", "content": system_prompt},
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
    ]

    context = LLMContext(messages, tools)
@@ -182,12 +157,6 @@ indicate you should use the get_image tool are:
    @transport.event_handler("on_client_connected")
    async def on_client_connected(transport, client):
        logger.info(f"Client connected")
-
-        await maybe_capture_participant_camera(transport, client)
-
-        global client_id
-        client_id = get_transport_client_id(transport, client)
-
        # Kick off the conversation.
        await task.queue_frames([LLMRunFrame()])

--- a/examples/foundational/16-gpu-container-local-bot.py
+++ b/examples/foundational/16-gpu-container-local-bot.py
@@ -26,7 +26,11 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.deepgram.tts import DeepgramTTSService
 from pipecat.services.openai.llm import OpenAILLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
-from pipecat.transports.daily.transport import DailyParams, DailyTransportMessageFrame
+from pipecat.transports.daily.transport import (
+    DailyOutputTransportMessageFrame,
+    DailyOutputTransportMessageUrgentFrame,
+    DailyParams,
+)
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams

 load_dotenv(override=True)
@@ -128,14 +132,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                logger.debug(f"Received latency ping app message: {message}")
                ts = message["latency-ping"]["ts"]
                # Send immediately
-                transport.output().send_message(
-                    DailyTransportMessageFrame(
+                await task.queue_frame(
+                    DailyOutputTransportMessageUrgentFrame(
                        message={"latency-pong-msg-handler": {"ts": ts}}, participant_id=sender
                    )
                )
                # And push to the pipeline for the Daily transport.output to send
                await task.queue_frame(
-                    DailyTransportMessageFrame(
+                    DailyOutputTransportMessageFrame(
                        message={"latency-pong-pipeline-delivery": {"ts": ts}},
                        participant_id=sender,
                    )
--- a/examples/foundational/19-openai-realtime.py
+++ b/examples/foundational/19-openai-realtime.py
@@ -5,6 +5,7 @@
 #


+import asyncio
 import os
 from datetime import datetime

@@ -13,25 +14,34 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.adapters.services.open_ai_realtime_adapter import OpenAIRealtimeLLMAdapter
 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import LLMRunFrame, TranscriptionMessage
+from pipecat.frames.frames import (
+    LLMRunFrame,
+    LLMSetToolsFrame,
+    LLMUpdateSettingsFrame,
+    TranscriptionMessage,
+)
 from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.processors.transcript_processor import TranscriptProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.openai_realtime import (
+from pipecat.services.openai.realtime.events import (
+    AudioConfiguration,
+    AudioInput,
    InputAudioNoiseReduction,
    InputAudioTranscription,
-    OpenAIRealtimeLLMService,
    SemanticTurnDetection,
    SessionProperties,
 )
-from pipecat.services.openai_realtime.events import AudioConfiguration, AudioInput
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -51,6 +61,18 @@ async def fetch_weather_from_api(params: FunctionCallParams):
    )


+async def get_news(params: FunctionCallParams):
+    await params.result_callback(
+        {
+            "news": [
+                "Massive UFO currently hovering above New York City",
+                "Stock markets reach all-time highs",
+                "Living dinosaur species discovered in the Amazon rainforest",
+            ],
+        }
+    )
+
+
 async def fetch_restaurant_recommendation(params: FunctionCallParams):
    await params.result_callback({"name": "The Golden Dragon"})

@@ -72,6 +94,13 @@ weather_function = FunctionSchema(
    required=["location", "format"],
 )

+get_news_function = FunctionSchema(
+    name="get_news",
+    description="Get the current news.",
+    properties={},
+    required=[],
+)
+
 restaurant_function = FunctionSchema(
    name="get_restaurant_recommendation",
    description="Get a restaurant recommendation",
@@ -125,6 +154,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                noise_reduction=InputAudioNoiseReduction(type="near_field"),
            )
        ),
+        # In this example we provide tools through the context, but you could
+        # alternatively provide them here.
        # tools=tools,
        instructions="""You are a helpful and friendly AI.

@@ -139,10 +170,6 @@ even if you're asked about them.
 You are participating in a voice conversation. Keep your responses concise, short, and to the point
 unless specifically asked to elaborate on a topic.

-You have access to the following tools:
- get_current_weather: Get the current weather for a given location.
- get_restaurant_recommendation: Get a restaurant recommendation for a given location.
-
 Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""",
    )

@@ -156,25 +183,26 @@ Remember, your responses should be short. Just one or two sentences, usually. Re
    # llm.register_function(None, fetch_weather_from_api)
    llm.register_function("get_current_weather", fetch_weather_from_api)
    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
+    llm.register_function("get_news", get_news)

    transcript = TranscriptProcessor()

    # Create a standard OpenAI LLM context object using the normal messages format. The
    # OpenAIRealtimeLLMService will convert this internally to messages that the
    # openai WebSocket API can understand.
-    context = OpenAILLMContext(
+    context = LLMContext(
        [{"role": "user", "content": "Say hello!"}],
        tools,
    )

-    context_aggregator = llm.create_context_aggregator(context)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
            transport.input(),  # Transport user input
            context_aggregator.user(),
+            transcript.user(),  # LLM pushes TranscriptionFrames upstream
            llm,  # LLM
-            transcript.user(),  # Placed after the LLM, as LLM pushes TranscriptionFrames downstream
            transport.output(),  # Transport bot output
            transcript.assistant(),  # After the transcript output, to time with the audio output
            context_aggregator.assistant(),
@@ -197,6 +225,22 @@ Remember, your responses should be short. Just one or two sentences, usually. Re
        # Kick off the conversation.
        await task.queue_frames([LLMRunFrame()])

+        # Add a new tool at runtime after a delay.
+        await asyncio.sleep(15)
+        new_tools = ToolsSchema(
+            standard_tools=[weather_function, restaurant_function, get_news_function]
+        )
+        await task.queue_frames([LLMSetToolsFrame(tools=new_tools)])
+        # Alternative pattern, useful if you're changing other session properties, too.
+        # (Though note that tools in your LLMContext take precedence over those
+        # in session properties, so if you have context-provided tools, prefer
+        # LLMSetToolsFrame instead, as it updates your context. Ditto for
+        # updating system instructions: send an LLMMessagesUpdateFrame with
+        # context messages updated with your new desired system message.)
+        # await task.queue_frames(
+        #     [LLMUpdateSettingsFrame(settings=SessionProperties(tools=new_tools).model_dump())]
+        # )
+
    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
        logger.info(f"Client disconnected")
--- a/examples/foundational/19a-azure-realtime.py
+++ b/examples/foundational/19a-azure-realtime.py
@@ -18,16 +18,19 @@ from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
+from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.openai_realtime import (
-    AzureRealtimeLLMService,
+from pipecat.services.openai.realtime.events import (
+    AudioConfiguration,
+    AudioInput,
    InputAudioTranscription,
    SessionProperties,
 )
-from pipecat.services.openai_realtime.events import AudioConfiguration, AudioInput
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -154,10 +157,10 @@ Remember, your responses should be short. Just one or two sentences, usually. Re
    llm.register_function("get_current_weather", fetch_weather_from_api)
    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)

-    # Create a standard OpenAI LLM context object using the normal messages format. The
+    # Create a standard LLM context object using the normal messages format. The
    # OpenAIRealtimeBetaLLMService will convert this internally to messages that the
    # openai WebSocket API can understand.
-    context = OpenAILLMContext(
+    context = LLMContext(
        [{"role": "user", "content": "Say hello!"}],
        # [{"role": "user", "content": [{"type": "text", "text": "Say hello!"}]}],
        #     [
@@ -172,7 +175,7 @@ Remember, your responses should be short. Just one or two sentences, usually. Re
        tools,
    )

-    context_aggregator = llm.create_context_aggregator(context)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
--- a/examples/foundational/19b-openai-realtime-text.py
+++ b/examples/foundational/19b-openai-realtime-text.py
@@ -18,20 +18,22 @@ from pipecat.frames.frames import LLMRunFrame, TranscriptionMessage
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.processors.transcript_processor import TranscriptProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.cartesia import CartesiaTTSService
+from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.openai_realtime import (
+from pipecat.services.openai.realtime.events import (
+    AudioConfiguration,
+    AudioInput,
    InputAudioNoiseReduction,
    InputAudioTranscription,
-    OpenAIRealtimeLLMService,
    SemanticTurnDetection,
    SessionProperties,
 )
-from pipecat.services.openai_realtime.events import AudioConfiguration, AudioInput
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -168,20 +170,20 @@ Remember, your responses should be short. Just one or two sentences, usually. Re
    # Create a standard OpenAI LLM context object using the normal messages format. The
    # OpenAIRealtimeLLMService will convert this internally to messages that the
    # openai WebSocket API can understand.
-    context = OpenAILLMContext(
+    context = LLMContext(
        [{"role": "user", "content": "Say hello!"}],
        tools,
    )

-    context_aggregator = llm.create_context_aggregator(context)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
            transport.input(),  # Transport user input
            context_aggregator.user(),
+            transcript.user(),  # LLM pushes TranscriptionFrames upstream
            llm,  # LLM
            tts,  # TTS
-            transcript.user(),  # Placed after the LLM, as LLM pushes TranscriptionFrames downstream
            transport.output(),  # Transport bot output
            transcript.assistant(),  # After the transcript output, to time with the audio output
            context_aggregator.assistant(),
--- a/examples/foundational/20b-persistent-context-openai-realtime.py
+++ b/examples/foundational/20b-persistent-context-openai-realtime.py
@@ -13,25 +13,27 @@ from datetime import datetime
 from dotenv import load_dotenv
 from loguru import logger

+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import (
-    OpenAILLMContext,
-)
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.openai_realtime import (
+from pipecat.services.openai.realtime.events import (
+    AudioConfiguration,
+    AudioInput,
    InputAudioTranscription,
-    OpenAIRealtimeLLMService,
    SessionProperties,
    TurnDetection,
 )
-from pipecat.services.openai_realtime.events import AudioConfiguration, AudioInput
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -68,11 +70,11 @@ async def save_conversation(params: FunctionCallParams):
    timestamp = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
    filename = f"{BASE_FILENAME}{timestamp}.json"
    logger.debug(
-        f"writing conversation to {filename}\n{json.dumps(params.context.messages, indent=4)}"
+        f"writing conversation to {filename}\n{json.dumps(params.context.get_messages(), indent=4)}"
    )
    try:
        with open(filename, "w") as file:
-            messages = params.context.get_messages_for_persistent_storage()
+            messages = params.context.get_messages()
            # remove the last message, which is the instruction we just gave to save the conversation
            messages.pop()
            json.dump(messages, file, indent=2)
@@ -89,6 +91,10 @@ async def load_conversation(params: FunctionCallParams):
            with open(filename, "r") as file:
                params.context.set_messages(json.load(file))
                await params.llm.reset_conversation()
+                # NOTE: we manually create a response here rather than relying
+                # on the function callback to trigger one since we've reset the
+                # conversation so the remote service doesn't know about the
+                # in-progress tool call.
                await params.llm._create_response()
        except Exception as e:
            await params.result_callback({"success": False, "error": str(e)})
@@ -96,14 +102,12 @@ async def load_conversation(params: FunctionCallParams):
    asyncio.create_task(_reset())


-tools = [
-    {
-        "type": "function",
-        "name": "get_current_weather",
-        "description": "Get the current weather",
-        "parameters": {
-            "type": "object",
-            "properties": {
+tools = ToolsSchema(
+    standard_tools=[
+        FunctionSchema(
+            name="get_current_weather",
+            description="Get the current weather",
+            properties={
                "location": {
                    "type": "string",
                    "description": "The city and state, e.g. San Francisco, CA",
@@ -114,45 +118,33 @@ tools = [
                    "description": "The temperature unit to use. Infer this from the users location.",
                },
            },
-            "required": ["location", "format"],
-        },
-    },
-    {
-        "type": "function",
-        "name": "save_conversation",
-        "description": "Save the current conversatione. Use this function to persist the current conversation to external storage.",
-        "parameters": {
-            "type": "object",
-            "properties": {},
-            "required": [],
-        },
-    },
-    {
-        "type": "function",
-        "name": "get_saved_conversation_filenames",
-        "description": "Get a list of saved conversation histories. Returns a list of filenames. Each filename includes a date and timestamp. Each file is conversation history that can be loaded into this session.",
-        "parameters": {
-            "type": "object",
-            "properties": {},
-            "required": [],
-        },
-    },
-    {
-        "type": "function",
-        "name": "load_conversation",
-        "description": "Load a conversation history. Use this function to load a conversation history into the current session.",
-        "parameters": {
-            "type": "object",
-            "properties": {
+            required=["location", "format"],
+        ),
+        FunctionSchema(
+            name="save_conversation",
+            description="Save the current conversatione. Use this function to persist the current conversation to external storage.",
+            properties={},
+            required=[],
+        ),
+        FunctionSchema(
+            name="get_saved_conversation_filenames",
+            description="Get a list of saved conversation histories. Returns a list of filenames. Each filename includes a date and timestamp. Each file is conversation history that can be loaded into this session.",
+            properties={},
+            required=[],
+        ),
+        FunctionSchema(
+            name="load_conversation",
+            description="Load a conversation history. Use this function to load a conversation history into the current session.",
+            properties={
                "filename": {
                    "type": "string",
                    "description": "The filename of the conversation history to load.",
                }
            },
-            "required": ["filename"],
-        },
-    },
-]
+            required=["filename"],
+        ),
+    ]
+)


 # We store functions so objects (e.g. SileroVADAnalyzer) don't get
@@ -223,8 +215,8 @@ Remember, your responses should be short. Just one or two sentences, usually."""
    llm.register_function("get_saved_conversation_filenames", get_saved_conversation_filenames)
    llm.register_function("load_conversation", load_conversation)

-    context = OpenAILLMContext([], tools)
-    context_aggregator = llm.create_context_aggregator(context)
+    context = LLMContext([{"role": "user", "content": "Say hello!"}], tools)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
--- a/examples/foundational/20c-persistent-context-anthropic.py
+++ b/examples/foundational/20c-persistent-context-anthropic.py
@@ -72,7 +72,6 @@ async def save_conversation(params: FunctionCallParams):
    )
    try:
        with open(filename, "w") as file:
-            # todo: extract 'system' into the first message in the list
            messages = params.context.get_messages()
            # remove the last message, which is the instruction we just gave to save the conversation
            messages.pop()
--- a/examples/foundational/20d-persistent-context-gemini.py
+++ b/examples/foundational/20d-persistent-context-gemini.py
@@ -90,7 +90,6 @@ async def save_conversation(params: FunctionCallParams):
    )
    try:
        with open(filename, "w") as file:
-            # todo: extract 'system' into the first message in the list
            messages = params.context.get_messages()
            # remove the last message (the instruction to save the context)
            messages.pop()
--- a/examples/foundational/20e-persistent-context-aws-nova-sonic.py
+++ b/examples/foundational/20e-persistent-context-aws-nova-sonic.py
@@ -20,10 +20,12 @@ from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.aws_nova_sonic.aws import AWSNovaSonicLLMService
+from pipecat.services.aws.nova_sonic.llm import AWSNovaSonicLLMService
 from pipecat.services.llm_service import FunctionCallParams
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -75,7 +77,7 @@ async def save_conversation(params: FunctionCallParams):
    filename = f"{BASE_FILENAME}{timestamp}.json"
    try:
        with open(filename, "w") as file:
-            messages = params.context.get_messages_for_persistent_storage()
+            messages = params.context.get_messages()
            # remove the last few messages. in reverse order, they are:
            # - the in progress save tool call
            # - the invocation of the save tool call
@@ -223,13 +225,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm.register_function("get_saved_conversation_filenames", get_saved_conversation_filenames)
    llm.register_function("load_conversation", load_conversation)

-    context = OpenAILLMContext(
+    context = LLMContext(
        messages=[
            {"role": "system", "content": f"{system_instruction}"},
        ],
        tools=tools,
    )
-    context_aggregator = llm.create_context_aggregator(context)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
--- a/examples/foundational/26-gemini-multimodal-live.py
+++ b/examples/foundational/26-gemini-multimodal-live.py
@@ -17,7 +17,7 @@ from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -65,7 +65,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    Respond to what the user said in a creative and helpful way.
    """

-    llm = GeminiMultimodalLiveLLMService(
+    llm = GeminiLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=system_instruction,
        voice_id="Puck",  # Aoede, Charon, Fenrir, Kore, Puck
--- a/examples/foundational/26a-gemini-multimodal-live-transcription.py
+++ b/examples/foundational/26a-gemini-multimodal-live-transcription.py
@@ -16,11 +16,13 @@ from pipecat.frames.frames import LLMRunFrame, TranscriptionMessage
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.processors.transcript_processor import TranscriptProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -65,14 +67,14 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    llm = GeminiMultimodalLiveLLMService(
+    llm = GeminiLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        voice_id="Aoede",  # Puck, Charon, Kore, Fenrir, Aoede
        # system_instruction="Talk like a pirate."
        # inference_on_context_initialization=False,
    )

-    context = OpenAILLMContext(
+    context = LLMContext(
        [
            {
                "role": "user",
@@ -90,7 +92,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            #     },
        ],
    )
-    context_aggregator = llm.create_context_aggregator(context)
+    context_aggregator = LLMContextAggregatorPair(context)

    transcript = TranscriptProcessor()

--- a/examples/foundational/26b-gemini-multimodal-live-function-calling.py
+++ b/examples/foundational/26b-gemini-multimodal-live-function-calling.py
@@ -19,10 +19,12 @@ from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
 from pipecat.services.llm_service import FunctionCallParams
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -122,12 +124,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        required=["location"],
    )
    search_tool = {"google_search": {}}
+    # KNOWN ISSUE: If using GeminiVertexLiveLLMService, it appears
+    # you cannot use the "google_search" tool alongside other tools.
+    # See https://github.com/googleapis/python-genai/issues/941.
    tools = ToolsSchema(
        standard_tools=[weather_function, restaurant_function],
        custom_tools={AdapterType.GEMINI: [search_tool]},
    )

-    llm = GeminiMultimodalLiveLLMService(
+    llm = GeminiLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=system_instruction,
        tools=tools,
@@ -136,10 +141,18 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm.register_function("get_current_weather", fetch_weather_from_api)
    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)

-    context = OpenAILLMContext(
-        [{"role": "user", "content": "Say hello."}],
+    # You can provide the system instructions and tools in the context rather
+    # than as arguments to GeminiLiveLLMService, but note that doing so will
+    # trigger a (fast) reconnection when the GeminiLiveLLMService first
+    # receives the context (i.e. when we send the LLMRunFrame below).
+    context = LLMContext(
+        [
+            # {"role": "system", "content": system_instruction},
+            {"role": "user", "content": "Say hello."},
+        ],
+        # tools,
    )
-    context_aggregator = llm.create_context_aggregator(context)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
--- a/examples/foundational/26c-gemini-multimodal-live-video.py
+++ b/examples/foundational/26c-gemini-multimodal-live-video.py
@@ -17,14 +17,16 @@ from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import (
    create_transport,
    maybe_capture_participant_camera,
    maybe_capture_participant_screen,
 )
-from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams

@@ -58,14 +60,14 @@ transport_params = {


 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
-    llm = GeminiMultimodalLiveLLMService(
+    llm = GeminiLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        voice_id="Aoede",  # Puck, Charon, Kore, Fenrir, Aoede
        # system_instruction="Talk like a pirate."
        # inference_on_context_initialization=False,
    )

-    context = OpenAILLMContext(
+    context = LLMContext(
        [
            {
                "role": "user",
@@ -73,7 +75,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            },
        ],
    )
-    context_aggregator = llm.create_context_aggregator(context)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
--- a/examples/foundational/26d-gemini-multimodal-live-text.py
+++ b/examples/foundational/26d-gemini-multimodal-live-text.py
@@ -16,13 +16,14 @@ from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.cartesia.tts import CartesiaTTSService
-from pipecat.services.gemini_multimodal_live.gemini import (
-    GeminiMultimodalLiveLLMService,
-    GeminiMultimodalModalities,
+from pipecat.services.google.gemini_live.llm import (
+    GeminiLiveLLMService,
+    GeminiModalities,
    InputParams,
 )
 from pipecat.transports.base_transport import BaseTransport, TransportParams
@@ -80,11 +81,15 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    llm = GeminiMultimodalLiveLLMService(
+    # KNOWN ISSUE: If using GeminiLiveVertexLLMService, you cannot specify a
+    # modality other than AUDIO (at least not if using the service's default
+    # model, which is a native audio model:
+    # https://cloud.google.com/vertex-ai/generative-ai/docs/live-api/tools#native-audio).
+    llm = GeminiLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=SYSTEM_INSTRUCTION,
        tools=[{"google_search": {}}, {"code_execution": {}}],
-        params=InputParams(modalities=GeminiMultimodalModalities.TEXT),
+        params=InputParams(modalities=GeminiModalities.TEXT),
    )

    # Optionally, you can set the response modalities via a function
@@ -105,8 +110,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    # Set up conversation context and management
    # The context_aggregator will automatically collect conversation context
-    context = OpenAILLMContext(messages)
-    context_aggregator = llm.create_context_aggregator(context)
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
--- a/examples/foundational/26e-gemini-multimodal-google-search.py
+++ b/examples/foundational/26e-gemini-multimodal-google-search.py
@@ -16,10 +16,12 @@ from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -83,14 +85,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

    # Initialize the Gemini Multimodal Live model
-    llm = GeminiMultimodalLiveLLMService(
+    llm = GeminiLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        voice_id="Puck",  # Aoede, Charon, Fenrir, Kore, Puck
        system_instruction=system_instruction,
        tools=tools,
    )

-    context = OpenAILLMContext(
+    context = LLMContext(
        [
            {
                "role": "user",
@@ -98,7 +100,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            }
        ],
    )
-    context_aggregator = llm.create_context_aggregator(context)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
--- a/examples/foundational/26f-gemini-multimodal-live-files-api.py
+++ b/examples/foundational/26f-gemini-multimodal-live-files-api.py
@@ -16,12 +16,12 @@ from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.gemini_multimodal_live.gemini import (
-    GeminiMultimodalLiveLLMService,
-)
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -110,7 +110,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    """

    # Initialize Gemini service with File API support
-    llm = GeminiMultimodalLiveLLMService(
+    llm = GeminiLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=system_instruction,
        voice_id="Charon",  # Aoede, Charon, Fenrir, Kore, Puck
@@ -131,7 +131,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        mime_type = "text/plain"

        # Create context with file reference
-        context = OpenAILLMContext(
+        context = LLMContext(
            [
                {
                    "role": "user",
@@ -154,7 +154,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    except Exception as e:
        logger.error(f"Error uploading file: {e}")
        # Continue with a basic context if file upload fails
-        context = OpenAILLMContext(
+        context = LLMContext(
            [
                {
                    "role": "user",
@@ -164,7 +164,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        )

    # Create context aggregator
-    context_aggregator = llm.create_context_aggregator(context)
+    context_aggregator = LLMContextAggregatorPair(context)

    # Build the pipeline
    pipeline = Pipeline(
--- a/examples/foundational/26g-gemini-multimodal-live-groundingMetadata.py
+++ b/examples/foundational/26g-gemini-multimodal-live-groundingMetadata.py
@@ -9,13 +9,15 @@ from pipecat.audio.vad.vad_analyzer import VADParams
 from pipecat.frames.frames import Frame, LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
-from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
 from pipecat.services.google.frames import LLMSearchResponseFrame
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -105,7 +107,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        custom_tools={AdapterType.GEMINI: [{"google_search": {}}, {"code_execution": {}}]},
    )

-    llm = GeminiMultimodalLiveLLMService(
+    llm = GeminiLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=SYSTEM_INSTRUCTION,
        voice_id="Charon",  # Aoede, Charon, Fenrir, Kore, Puck
@@ -124,8 +126,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    ]

    # Set up conversation context and management
-    context = OpenAILLMContext(messages)
-    context_aggregator = llm.create_context_aggregator(context)
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
--- a/examples/foundational/26h-gemini-live-vertex-function-calling.py
+++ b/examples/foundational/26h-gemini-live-vertex-function-calling.py
@@ -0,0 +1,189 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+from datetime import datetime
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.google.gemini_live.llm_vertex import GeminiLiveVertexLLMService
+from pipecat.services.llm_service import FunctionCallParams
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+
+async def fetch_weather_from_api(params: FunctionCallParams):
+    temperature = 75 if params.arguments["format"] == "fahrenheit" else 24
+    await params.result_callback(
+        {
+            "conditions": "nice",
+            "temperature": temperature,
+            "format": params.arguments["format"],
+            "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
+        }
+    )
+
+
+async def fetch_restaurant_recommendation(params: FunctionCallParams):
+    await params.result_callback({"name": "The Golden Dragon"})
+
+
+system_instruction = """
+You are a helpful assistant who can answer questions and use tools.
+
+You have three tools available to you:
+1. get_current_weather: Use this tool to get the current weather in a specific location.
+2. get_restaurant_recommendation: Use this tool to get a restaurant recommendation in a specific location.
+"""
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        # set stop_secs to something roughly similar to the internal setting
+        # of the Multimodal Live api, just to align events. This doesn't really
+        # matter because we can only use the Multimodal Live API's phrase
+        # endpointing, for now.
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        # set stop_secs to something roughly similar to the internal setting
+        # of the Multimodal Live api, just to align events. This doesn't really
+        # matter because we can only use the Multimodal Live API's phrase
+        # endpointing, for now.
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        # set stop_secs to something roughly similar to the internal setting
+        # of the Multimodal Live api, just to align events. This doesn't really
+        # matter because we can only use the Multimodal Live API's phrase
+        # endpointing, for now.
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    weather_function = FunctionSchema(
+        name="get_current_weather",
+        description="Get the current weather",
+        properties={
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA",
+            },
+            "format": {
+                "type": "string",
+                "enum": ["celsius", "fahrenheit"],
+                "description": "The temperature unit to use. Infer this from the user's location.",
+            },
+        },
+        required=["location", "format"],
+    )
+    restaurant_function = FunctionSchema(
+        name="get_restaurant_recommendation",
+        description="Get a restaurant recommendation",
+        properties={
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA",
+            },
+        },
+        required=["location"],
+    )
+    # KNOWN ISSUE: If using GeminiVertexLiveLLMService, it appears
+    # you cannot use the "google_search" tool alongside other tools.
+    # See https://github.com/googleapis/python-genai/issues/941.
+    tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
+
+    llm = GeminiLiveVertexLLMService(
+        credentials=os.getenv("GOOGLE_VERTEX_TEST_CREDENTIALS"),
+        project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
+        location=os.getenv("GOOGLE_CLOUD_LOCATION"),
+        system_instruction=system_instruction,
+        voice_id="Puck",  # Aoede, Charon, Fenrir, Kore, Puck
+        tools=tools,
+    )
+
+    llm.register_function("get_current_weather", fetch_weather_from_api)
+    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
+
+    context = LLMContext([{"role": "user", "content": "Say hello."}])
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),
+            context_aggregator.user(),
+            llm,
+            transport.output(),
+            context_aggregator.assistant(),
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/26i-gemini-live-graceful-end.py
+++ b/examples/foundational/26i-gemini-live-graceful-end.py
@@ -0,0 +1,206 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+from datetime import datetime
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import EndTaskFrame, LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.llm_service import FunctionCallParams
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+
+async def fetch_weather_from_api(params: FunctionCallParams):
+    temperature = 75 if params.arguments["format"] == "fahrenheit" else 24
+    await params.result_callback(
+        {
+            "conditions": "nice",
+            "temperature": temperature,
+            "format": params.arguments["format"],
+            "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
+        }
+    )
+
+
+async def fetch_restaurant_recommendation(params: FunctionCallParams):
+    await params.result_callback({"name": "The Golden Dragon"})
+
+
+async def end_conversation(params: FunctionCallParams):
+    await params.result_callback({"success": True})
+    await params.llm.push_frame(EndTaskFrame(), FrameDirection.UPSTREAM)
+
+
+system_instruction = """
+You are a helpful assistant who can answer questions and use tools.
+
+You have three tools available to you:
+1. get_current_weather: Use this tool to get the current weather in a specific location.
+2. get_restaurant_recommendation: Use this tool to get a restaurant recommendation in a specific location.
+3. end_conversation: Use this tool to gracefully end the conversation.
+
+After you've responded to the user three times, do two things, in order:
+1. Politely let them know that that's all the time you have today and say goodbye.
+2. *WITHOUT WAITING FOR THE USER TO RESPOND*, call the end_conversation tool to gracefully end the conversation.
+"""
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        # set stop_secs to something roughly similar to the internal setting
+        # of the Multimodal Live api, just to align events. This doesn't really
+        # matter because we can only use the Multimodal Live API's phrase
+        # endpointing, for now.
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        # set stop_secs to something roughly similar to the internal setting
+        # of the Multimodal Live api, just to align events. This doesn't really
+        # matter because we can only use the Multimodal Live API's phrase
+        # endpointing, for now.
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        # set stop_secs to something roughly similar to the internal setting
+        # of the Multimodal Live api, just to align events. This doesn't really
+        # matter because we can only use the Multimodal Live API's phrase
+        # endpointing, for now.
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    weather_function = FunctionSchema(
+        name="get_current_weather",
+        description="Get the current weather",
+        properties={
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA",
+            },
+            "format": {
+                "type": "string",
+                "enum": ["celsius", "fahrenheit"],
+                "description": "The temperature unit to use. Infer this from the user's location.",
+            },
+        },
+        required=["location", "format"],
+    )
+    restaurant_function = FunctionSchema(
+        name="get_restaurant_recommendation",
+        description="Get a restaurant recommendation",
+        properties={
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA",
+            },
+        },
+        required=["location"],
+    )
+    end_conversation_function = FunctionSchema(
+        name="end_conversation",
+        description="Gracefully end the conversation",
+        properties={},
+        required=[],
+    )
+    search_tool = {"google_search": {}}
+    tools = ToolsSchema(
+        standard_tools=[weather_function, restaurant_function, end_conversation_function],
+        custom_tools={AdapterType.GEMINI: [search_tool]},
+    )
+
+    llm = GeminiLiveLLMService(
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        system_instruction=system_instruction,
+        tools=tools,
+    )
+
+    llm.register_function("get_current_weather", fetch_weather_from_api)
+    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
+    llm.register_function("end_conversation", end_conversation)
+
+    context = LLMContext(
+        [{"role": "user", "content": "Say hello."}],
+    )
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),
+            context_aggregator.user(),
+            llm,
+            transport.output(),
+            context_aggregator.assistant(),
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/27-simli-layer.py
+++ b/examples/foundational/27-simli-layer.py
@@ -9,7 +9,6 @@ import os

 from dotenv import load_dotenv
 from loguru import logger
-from simli import SimliConfig

 from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
 from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
@@ -66,11 +65,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    tts = CartesiaTTSService(
        api_key=os.getenv("CARTESIA_API_KEY"),
-        voice_id="a167e0f3-df7e-4d52-a9c3-f949145efdab",
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",
    )

    simli_ai = SimliVideoService(
-        SimliConfig(os.getenv("SIMLI_API_KEY"), os.getenv("SIMLI_FACE_ID")),
+        api_key=os.getenv("SIMLI_API_KEY"),
+        face_id="cace3ef7-a4c4-425d-a8cf-a5358eb0c427",
    )

    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini")
--- a/examples/foundational/38b-smart-turn-local.py
+++ b/examples/foundational/38b-smart-turn-local.py
@@ -20,6 +20,7 @@ from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.cartesia.tts import CartesiaTTSService
@@ -78,9 +79,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext(messages)
    context_aggregator = LLMContextAggregatorPair(context)

+    rtvi = RTVIProcessor()
+
    pipeline = Pipeline(
        [
            transport.input(),  # Transport user input
+            rtvi,
            stt,
            context_aggregator.user(),  # User responses
            llm,  # LLM
@@ -96,15 +100,20 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
+        observers=[RTVIObserver(rtvi)],
        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

+    @rtvi.event_handler("on_client_ready")
+    async def on_client_ready(rtvi):
+        await rtvi.set_bot_ready()
+        # Kick off the conversation
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+
    @transport.event_handler("on_client_connected")
    async def on_client_connected(transport, client):
        logger.info(f"Client connected")
-        # Kick off the conversation.
-        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
-        await task.queue_frames([LLMRunFrame()])

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
--- a/examples/foundational/39-mcp-stdio.py
+++ b/examples/foundational/39-mcp-stdio.py
@@ -6,6 +6,7 @@

 import asyncio
 import io
+import json
 import os
 import re
 import shutil
@@ -63,10 +64,12 @@ class UrlToImageProcessor(FrameProcessor):
            await self.push_frame(frame, direction)

    def extract_url(self, text: str):
-        pattern = r"!\[[^\]]*\]\((https?://[^)]+\.(png|jpg|jpeg|PNG|JPG|JPEG))\)"
-        match = re.search(pattern, text)
-        if match:
-            return match.group(1)
+        data = json.loads(text)
+        if "artObject" in data:
+            return data["artObject"]["webImage"]["url"]
+        if "artworks" in data and len(data["artworks"]):
+            return data["artworks"][0]["webImage"]["url"]
+
        return None

    async def run_image_process(self, image_url: str):
@@ -130,9 +133,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            mcp = MCPClient(
                server_params=StdioServerParameters(
                    command=shutil.which("npx"),
-                    args=["-y", "@programcomputer/nasa-mcp-server@latest"],
-                    # https://api.nasa.gov
-                    env={"NASA_API_KEY": os.getenv("NASA_API_KEY")},
+                    # https://github.com/r-huijts/rijksmuseum-mcp
+                    args=["-y", "mcp-server-rijksmuseum"],
+                    env={"RIJKSMUSEUM_API_KEY": os.getenv("RIJKSMUSEUM_API_KEY")},
                )
            )
        except Exception as e:
@@ -141,15 +144,20 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

        mcp_image = UrlToImageProcessor(aiohttp_session=session)

-        tools = await mcp.register_tools(llm)
+        tools = {}
+        try:
+            tools = await mcp.register_tools(llm)
+        except Exception as e:
+            logger.error(f"error registering tools")
+            logger.exception("error trace:")

        system = f"""
        You are a helpful LLM in a WebRTC call.
        Your goal is to demonstrate your capabilities in a succinct way.
-        You have access to a number of tools provided by NASA MCP. Use any and all tools to help users.
-        When asked for the astronomy picture of the day, PASS in NO date to the API.
-        This ensures we get the latest picture available. If as specific date is asked for, you
-        can pass in that date to the API.
+        You have access to tools to search the Rijksmuseum collection.
+        Offer, for example, to show the earliest Rembrandt work from the museum. Use the `search_artwork` tool.
+        The tool may respond with a JSON object with an `artworks` array. Choose the art from that array.
+        Once the tool has responded, tell the user the title and use the `open_image_in_browser` tool.
        Your output will be converted to audio so don't include special characters in your answers.
        Respond to what the user said in a creative and helpful way.
        Don't overexplain what you are doing.
@@ -206,6 +214,13 @@ async def bot(runner_args: RunnerArguments):


 if __name__ == "__main__":
+    if not os.getenv("RIJKSMUSEUM_API_KEY"):
+        logger.error(
+            f"Please set RIJKSMUSEUM_API_KEY environment variable for this example. See https://github.com/r-huijts/rijksmuseum-mcp and https://www.rijksmuseum.nl/en/register?redirectUrl=https://www.https://www.rijksmuseum.nl/en/rijksstudio/my/profile"
+        )
+        import sys
+
+        sys.exit(1)
    from pipecat.runner.run import main

    main()
--- a/examples/foundational/39a-mcp-run-sse.py
+++ b/examples/foundational/39a-mcp-run-sse.py
@@ -79,7 +79,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        logger.error(f"error setting up mcp")
        logger.exception("error trace:")

-    tools = await mcp.register_tools(llm)
+    tools = {}
+    try:
+        tools = await mcp.register_tools(llm)
+    except Exception as e:
+        logger.error(f"error registering tools")
+        logger.exception("error trace:")

    system = f"""
    You are a helpful LLM in a WebRTC call.
@@ -141,6 +146,14 @@ async def bot(runner_args: RunnerArguments):


 if __name__ == "__main__":
+    if not os.getenv("MCP_RUN_SSE_URL"):
+        logger.error(
+            f"Please set MCP_RUN_SSE_URL environment variable for this example. See https://mcp.run"
+        )
+        import sys
+
+        sys.exit(1)
+
    from pipecat.runner.run import main

    main()
--- a/examples/foundational/39b-multiple-mcp.py
+++ b/examples/foundational/39b-multiple-mcp.py
@@ -132,9 +132,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        system = f"""
        You are a helpful LLM in a WebRTC call.
        Your goal is to demonstrate your capabilities in a succinct way.
-        You have access to a number of tools provided by NASA MCP. Use any and all tools to help users.
-        When asked for today's date, use 'https://www.datetoday.net/'.
-        When asked for the astronomy picture of the day, use 'https://www.datetoday.net/', to get today's date.
+        You have access to tools to search the Rijksmuseum collection.
+        Offer, for example, to show the earliest Rembrandt work from the museum. Use the `search_artwork` tool.
+        The tool may respond with a JSON object with an `artworks` array. Choose the art from that array.
+        Once the tool has responded, tell the user the title and use the `open_image_in_browser` tool.
        Your output will be converted to audio so don't include special characters in your answers.
        Respond to what the user said in a creative and helpful way.
        Don't overexplain what you are doing.
@@ -147,13 +148,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            mcp = MCPClient(
                server_params=StdioServerParameters(
                    command=shutil.which("npx"),
-                    args=["-y", "@programcomputer/nasa-mcp-server@latest"],
-                    # https://api.nasa.gov
-                    env={"NASA_API_KEY": os.getenv("NASA_API_KEY")},
+                    # https://github.com/r-huijts/rijksmuseum-mcp
+                    args=["-y", "mcp-server-error setting up mcp"],
+                    env={"RIJKSMUSEUM_API_KEY": os.getenv("RIJKSMUSEUM_API_KEY")},
                )
            )
        except Exception as e:
-            logger.error(f"error setting up nasa mcp")
+            logger.error(f"error setting up rijksmuseum mcp")
            logger.exception("error trace:")
        try:
            # https://docs.mcp.run/integrating/tutorials/mcp-run-sse-openai-agents/
@@ -164,8 +165,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            logger.error(f"error setting up mcp.run")
            logger.exception("error trace:")

-        tools = await mcp.register_tools(llm)
-        run_tools = await mcp_run.register_tools(llm)
+        tools = {}
+        run_tools = {}
+        try:
+            tools = await mcp.register_tools(llm)
+            run_tools = await mcp_run.register_tools(llm)
+        except Exception as e:
+            logger.error(f"error registering tools")
+            logger.exception("error trace:")

        all_standard_tools = run_tools.standard_tools + tools.standard_tools
        all_tools = ToolsSchema(standard_tools=all_standard_tools)
@@ -219,6 +226,14 @@ async def bot(runner_args: RunnerArguments):


 if __name__ == "__main__":
+    if not os.getenv("RIJKSMUSEUM_API_KEY") or not os.getenv("MCP_RUN_SSE_URL"):
+        logger.error(
+            f"Please set RIJKSMUSEUM_API_KEY and MCP_RUN_SSE_URL environment variables. See https://github.com/r-huijts/rijksmuseum-mcp and https://mcp.run"
+        )
+        import sys
+
+        sys.exit(1)
+
    from pipecat.runner.run import main

    main()
--- a/examples/foundational/39c-mcp-run-http.py
+++ b/examples/foundational/39c-mcp-run-http.py
@@ -85,7 +85,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        logger.error(f"error setting up mcp")
        logger.exception("error trace:")

-    tools = await mcp.register_tools(llm)
+    tools = {}
+    try:
+        tools = await mcp.register_tools(llm)
+    except Exception as e:
+        logger.error(f"error registering tools")
+        logger.exception("error trace:")

    system = f"""
    You are a helpful LLM in a WebRTC call.
@@ -145,6 +150,14 @@ async def bot(runner_args: RunnerArguments):


 if __name__ == "__main__":
+    if not os.getenv("GITHUB_PERSONAL_ACCESS_TOKEN"):
+        logger.error(
+            f"Please set GITHUB_PERSONAL_ACCESS_TOKEN environment variable for this example."
+        )
+        import sys
+
+        sys.exit(1)
+
    from pipecat.runner.run import main

    main()
--- a/examples/foundational/39d-mcp-run-http-gemini-live.py
+++ b/examples/foundational/39d-mcp-run-http-gemini-live.py
@@ -0,0 +1,165 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+from mcp.client.session_group import StreamableHttpParameters
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import NOT_GIVEN, LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.mcp_service import MCPClient
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    try:
+        # Github MCP docs: https://github.com/github/github-mcp-server
+        # Enable Github Copilot on your GitHub account. Free tier is ok. (https://github.com/settings/copilot)
+        # Generate a personal access token. It must be a Fine-grained token, classic tokens are not supported. (https://github.com/settings/personal-access-tokens)
+        # Set permissions you want to use (eg. "all repositories", "profile: read/write", etc)
+        mcp = MCPClient(
+            server_params=StreamableHttpParameters(
+                url="https://api.githubcopilot.com/mcp/",
+                headers={"Authorization": f"Bearer {os.getenv('GITHUB_PERSONAL_ACCESS_TOKEN')}"},
+            )
+        )
+    except Exception as e:
+        logger.error(f"error setting up mcp")
+        logger.exception("error trace:")
+
+    tools = {}
+    try:
+        tools = await mcp.get_tools_schema()
+    except Exception as e:
+        logger.error(f"error registering tools")
+        logger.exception("error trace:")
+
+    system = f"""
+    You are a helpful LLM in a WebRTC call.
+    Your goal is to answer questions about the user's GitHub repositories and account.
+    You have access to a number of tools provided by Github. Use any and all tools to help users.
+    Your output will be converted to audio so don't include special characters in your answers.
+    Don't overexplain what you are doing.
+    Just respond with short sentences when you are carrying out tool calls.
+    """
+
+    llm = GeminiLiveLLMService(
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        system_instruction=system,
+        tools=tools,
+    )
+
+    await mcp.register_tools_schema(tools, llm)
+
+    context = LLMContext([{"role": "user", "content": "Please introduce yourself."}])
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            context_aggregator.user(),  # User spoken responses
+            llm,  # LLM
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses and tool context
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected: {client}")
+        # Kick off the conversation.
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    if not os.getenv("GITHUB_PERSONAL_ACCESS_TOKEN"):
+        logger.error(
+            f"Please set GITHUB_PERSONAL_ACCESS_TOKEN environment variable for this example."
+        )
+        import sys
+
+        sys.exit(1)
+
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/40-aws-nova-sonic.py
+++ b/examples/foundational/40-aws-nova-sonic.py
@@ -18,10 +18,11 @@ from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.aws_nova_sonic import AWSNovaSonicLLMService
+from pipecat.services.aws.nova_sonic.llm import AWSNovaSonicLLMService
 from pipecat.services.llm_service import FunctionCallParams
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -119,9 +120,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm.register_function("get_current_weather", fetch_weather_from_api)

    # Set up context and context management.
-    # AWSNovaSonicService will adapt OpenAI LLM context objects with standard message format to
-    # what's expected by Nova Sonic.
-    context = OpenAILLMContext(
+    context = LLMContext(
        messages=[
            {"role": "system", "content": f"{system_instruction}"},
            {
@@ -131,7 +130,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        ],
        tools=tools,
    )
-    context_aggregator = llm.create_context_aggregator(context)
+    context_aggregator = LLMContextAggregatorPair(context)

    # Build the pipeline
    pipeline = Pipeline(
--- a/examples/foundational/46-video-processing.py
+++ b/examples/foundational/46-video-processing.py
@@ -15,12 +15,14 @@ from pipecat.frames.frames import Frame, InputImageRawFrame, LLMRunFrame, Output
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.processors.frameworks.rtvi import RTVIObserver, RTVIProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.gemini_multimodal_live import GeminiMultimodalLiveLLMService
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
 from pipecat.transports.base_transport import TransportParams
 from pipecat.transports.daily.transport import DailyParams, DailyTransport

@@ -94,7 +96,7 @@ Respond to what the user said in a creative and helpful way. Keep your responses


 async def run_bot(pipecat_transport):
-    llm = GeminiMultimodalLiveLLMService(
+    llm = GeminiLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        voice_id="Puck",  # Aoede, Charon, Fenrir, Kore, Puck
        transcribe_user_audio=True,
@@ -108,8 +110,8 @@ async def run_bot(pipecat_transport):
        }
    ]

-    context = OpenAILLMContext(messages)
-    context_aggregator = llm.create_context_aggregator(context)
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)

    # RTVI events for Pipecat client UI
    rtvi = RTVIProcessor()
--- a/examples/foundational/47-sentry-metrics.py
+++ b/examples/foundational/47-sentry-metrics.py
@@ -0,0 +1,142 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+import sentry_sdk
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.metrics.sentry import SentryMetrics
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    # Initialize Sentry
+    sentry_sdk.init(
+        dsn=os.getenv("SENTRY_DSN"),
+        traces_sample_rate=1.0,
+    )
+
+    stt = DeepgramSTTService(
+        api_key=os.getenv("DEEPGRAM_API_KEY"),
+        metrics=SentryMetrics(),
+    )
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+        metrics=SentryMetrics(),
+    )
+
+    llm = OpenAILLMService(
+        api_key=os.getenv("OPENAI_API_KEY"),
+        metrics=SentryMetrics(),
+    )
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/48-service-switcher.py
+++ b/examples/foundational/48-service-switcher.py
@@ -0,0 +1,196 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame, ManuallySwitchServiceFrame
+from pipecat.pipeline.llm_switcher import LLMSwitcher
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.service_switcher import ServiceSwitcher, ServiceSwitcherStrategyManual
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.stt import CartesiaSTTService
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.deepgram.tts import DeepgramTTSService
+from pipecat.services.google.llm import GoogleLLMService
+from pipecat.services.llm_service import FunctionCallParams
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+
+# "Classic" function
+async def fetch_weather_from_api(params: FunctionCallParams):
+    await params.result_callback({"conditions": "nice", "temperature": "75"})
+
+
+# "Direct" function
+async def get_restaurant_recommendation(params: FunctionCallParams, location: str):
+    """
+    Get a restaurant recommendation.
+
+    Args:
+        location (str): The city and state, e.g. "San Francisco, CA".
+    """
+    await params.result_callback({"name": "The Golden Dragon"})
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    weather_function = FunctionSchema(
+        name="get_current_weather",
+        description="Get the current weather",
+        properties={
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA",
+            },
+            "format": {
+                "type": "string",
+                "enum": ["celsius", "fahrenheit"],
+                "description": "The temperature unit to use. Infer this from the user's location.",
+            },
+        },
+        required=["location", "format"],
+    )
+
+    stt_cartesia = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))
+    stt_deepgram = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+    stt_switcher = ServiceSwitcher(
+        services=[stt_cartesia, stt_deepgram], strategy_type=ServiceSwitcherStrategyManual
+    )
+
+    tts_cartesia = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",
+    )
+    tts_deepgram = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+    tts_switcher = ServiceSwitcher(
+        services=[tts_cartesia, tts_deepgram], strategy_type=ServiceSwitcherStrategyManual
+    )
+
+    llm_openai = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+    llm_google = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
+    llm_switcher = LLMSwitcher(
+        llms=[llm_openai, llm_google], strategy_type=ServiceSwitcherStrategyManual
+    )
+    # Register a "classic" function
+    llm_switcher.register_function("get_current_weather", fetch_weather_from_api)
+    # Register a "direct" function
+    llm_switcher.register_direct_function(get_restaurant_recommendation)
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+    tools = ToolsSchema(standard_tools=[weather_function, get_restaurant_recommendation])
+
+    context = LLMContext(messages, tools)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt_switcher,
+            context_aggregator.user(),  # User responses
+            llm_switcher,  # LLM
+            tts_switcher,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+        await asyncio.sleep(15)
+        print(f"Switching to {stt_deepgram}")
+        await task.queue_frames([ManuallySwitchServiceFrame(service=stt_deepgram)])
+        await asyncio.sleep(15)
+        print(f"Switching to {llm_google}")
+        await task.queue_frames([ManuallySwitchServiceFrame(service=llm_google)])
+        await asyncio.sleep(15)
+        print(f"Switching to {tts_deepgram}")
+        await task.queue_frames([ManuallySwitchServiceFrame(service=tts_deepgram)])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/README.md
+++ b/examples/foundational/README.md
@@ -105,7 +105,7 @@ uv run 07-interruptible.py -t twilio -x NGROK_HOST_NAME
 ### Vision & Multimodal

 - **[12a-describe-video-gemini-flash.py](./12a-describe-video-gemini-flash.py)**: Bot describes user's video (Video input, Multimodal LLMs)
- **[26c-gemini-multimodal-live-video.py](./26c-gemini-multimodal-live-video.py)**: Gemini with video input (Streaming video, Function calls)
+- **[26c-gemini-live-video.py](./26c-gemini-live-video.py)**: Gemini with video input (Streaming video, Function calls)

 ### Voice & Language

--- a/examples/foundational/assets/cat.jpg
+++ b/examples/foundational/assets/cat.jpg
--- a/examples/foundational/assets/moondream.png
+++ b/examples/foundational/assets/moondream.png
--- a/examples/quickstart/README.md
+++ b/examples/quickstart/README.md
@@ -73,13 +73,13 @@ Transform your local bot into a production-ready service. Pipecat Cloud handles

 1. [Sign up for Pipecat Cloud](https://pipecat.daily.co/sign-up).

-2. Install the Pipecat Cloud CLI:
+2. Install the Pipecat CLI:

   ```bash
-   uv add pipecatcloud
+   uv tool install pipecat-ai-cli
   ```

-> 💡 Tip: You can run the `pipecatcloud` CLI using the `pcc` alias.
+> 💡 Tip: You can run the `pipecat` CLI using the `pc` alias.

 3. Set up Docker for building your bot image:

@@ -113,12 +113,22 @@ secret_set = "quickstart-secrets"

 > 💡 Tip: [Set up `image_credentials`](https://docs.pipecat.ai/deployment/pipecat-cloud/fundamentals/secrets#image-pull-secrets) in your TOML file for authenticated image pulls

+### Log in to Pipecat Cloud
+
+To start using the CLI, authenticate to Pipecat Cloud:
+
+```bash
+pipecat cloud auth login
+```
+
+You'll be presented with a link that you can click to authenticate your client.
+
 ### Configure secrets

 Upload your API keys to Pipecat Cloud's secure storage:

 ```bash
-uv run pcc secrets set quickstart-secrets --file .env
+pipecat cloud secrets set quickstart-secrets --file .env
 ```

 This creates a secret set called `quickstart-secrets` (matching your TOML file) and uploads all your API keys from `.env`.
@@ -128,13 +138,13 @@ This creates a secret set called `quickstart-secrets` (matching your TOML file)
 Build your Docker image and push to Docker Hub:

 ```bash
-uv run pcc docker build-push
+pipecat cloud docker build-push
 ```

 Deploy to Pipecat Cloud:

 ```bash
-uv run pcc deploy
+pipecat cloud deploy
 ```

 ### Connect to your agent
--- a/examples/quickstart/pcc-deploy.toml
+++ b/examples/quickstart/pcc-deploy.toml
@@ -1,6 +1,11 @@
 agent_name = "quickstart"
 image = "your_username/quickstart:0.1"
 secret_set = "quickstart-secrets"
+agent_profile = "agent-1x"
+
+# RECOMMENDED: Set an image pull secret:
+# https://docs.pipecat.ai/deployment/pipecat-cloud/fundamentals/secrets#image-pull-secrets
+# image_credentials = "your_image_pull_secret"

 [scaling]
 	min_agents = 1
--- a/examples/quickstart/pyproject.toml
+++ b/examples/quickstart/pyproject.toml
@@ -4,13 +4,14 @@ version = "0.1.0"
 description = "Quickstart example for building voice AI bots with Pipecat"
 requires-python = ">=3.10"
 dependencies = [
-    "pipecat-ai[webrtc,daily,silero,deepgram,openai,cartesia,local-smart-turn-v3,runner]>=0.0.85",
-    "pipecatcloud>=0.2.4"
+    "pipecat-ai[webrtc,daily,silero,deepgram,openai,cartesia,local-smart-turn-v3,runner]",
+    "pipecat-ai-cli"
 ]

 [dependency-groups]
 dev = [
-    "ruff~=0.12.1",
+    "pyright>=1.1.404,<2",
+    "ruff>=0.12.11,<1",
 ]

 [tool.ruff]
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,7 @@ dependencies = [
    "pyloudnorm~=0.1.1",
    "resampy~=0.4.3",
    "soxr~=0.5.0",
-    "openai>=1.74.0,<=1.99.1",
+    "openai>=1.74.0,<3",
    # Pinning numba to resolve package dependencies
    "numba==0.61.2",
    "wait_for2>=0.4.1; python_version<'3.12'",
@@ -50,23 +50,24 @@ anthropic = [ "anthropic~=0.49.0" ]
 assemblyai = [ "pipecat-ai[websockets-base]" ]
 asyncai = [ "pipecat-ai[websockets-base]" ]
 aws = [ "aioboto3~=15.0.0", "pipecat-ai[websockets-base]" ]
-aws-nova-sonic = [ "aws_sdk_bedrock_runtime~=0.0.2; python_version>='3.12'" ]
+aws-nova-sonic = [ "aws_sdk_bedrock_runtime~=0.1.1; python_version>='3.12'" ]
 azure = [ "azure-cognitiveservices-speech~=1.42.0"]
 cartesia = [ "cartesia~=2.0.3", "pipecat-ai[websockets-base]" ]
 cerebras = []
 deepseek = []
-daily = [ "daily-python~=0.19.9" ]
+daily = [ "daily-python~=0.21.0" ]
 deepgram = [ "deepgram-sdk~=4.7.0" ]
 elevenlabs = [ "pipecat-ai[websockets-base]" ]
 fal = [ "fal-client~=0.5.9" ]
 fireworks = []
 fish = [ "ormsgpack~=1.7.0", "pipecat-ai[websockets-base]" ]
 gladia = [ "pipecat-ai[websockets-base]" ]
-google = [ "google-cloud-speech~=2.32.0", "google-cloud-texttospeech~=2.26.0", "google-genai~=1.24.0", "pipecat-ai[websockets-base]" ]
+google = [ "google-cloud-speech>=2.33.0,<3", "google-cloud-texttospeech>=2.31.0,<3", "google-genai>=1.41.0,<2", "pipecat-ai[websockets-base]" ]
 grok = []
 groq = [ "groq~=0.23.0" ]
 gstreamer = [ "pygobject~=3.50.0" ]
 heygen = [ "livekit>=1.0.13", "pipecat-ai[websockets-base]" ]
+hume = [ "hume>=0.11.2" ]
 inworld = []
 krisp = [ "pipecat-ai-krisp~=0.4.0" ]
 koala = [ "pvkoala~=2.0.3" ]
@@ -83,32 +84,32 @@ nim = []
 neuphonic = [ "pipecat-ai[websockets-base]" ]
 noisereduce = [ "noisereduce~=3.0.3" ]
 openai = [ "pipecat-ai[websockets-base]" ]
-openpipe = [ "openpipe~=4.50.0" ]
+openpipe = [ "openpipe>=4.50.0,<6" ]
 openrouter = []
 perplexity = []
 playht = [ "pipecat-ai[websockets-base]" ]
 qwen = []
 rime = [ "pipecat-ai[websockets-base]" ]
 riva = [ "nvidia-riva-client~=2.21.1" ]
-runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.117.0", "pipecat-ai-small-webrtc-prebuilt>=1.0.0"]
+runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.122.0", "pipecat-ai-small-webrtc-prebuilt>=1.0.0"]
 sambanova = []
-sarvam = [ "pipecat-ai[websockets-base]" ]
+sarvam = [ "sarvamai==0.1.21", "pipecat-ai[websockets-base]" ]
 sentry = [ "sentry-sdk>=2.28.0,<3" ]
 local-smart-turn = [ "coremltools>=8.0", "transformers", "torch>=2.5.0,<3", "torchaudio>=2.5.0,<3" ]
 local-smart-turn-v3 = [ "transformers", "onnxruntime>=1.20.1,<2" ]
 remote-smart-turn = []
 silero = [ "onnxruntime>=1.20.1,<2" ]
-simli = [ "simli-ai~=0.1.10"]
+simli = [ "simli-ai~=0.1.25"]
 soniox = [ "pipecat-ai[websockets-base]" ]
-soundfile = [ "soundfile~=0.13.0" ]
-speechmatics = [ "speechmatics-rt>=0.4.0" ]
+soundfile = [ "soundfile~=0.13.1" ]
+speechmatics = [ "speechmatics-rt>=0.5.0" ]
 strands = [ "strands-agents>=1.9.1,<2" ]
 tavus=[]
 together = []
 tracing = [ "opentelemetry-sdk>=1.33.0", "opentelemetry-api>=1.33.0", "opentelemetry-instrumentation>=0.54b0" ]
 ultravox = [ "transformers>=4.48.0", "vllm>=0.9.0" ]
 webrtc = [ "aiortc>=1.13.0,<2", "opencv-python>=4.11.0.86,<5" ]
-websocket = [ "pipecat-ai[websockets-base]", "fastapi>=0.115.6,<0.117.0" ]
+websocket = [ "pipecat-ai[websockets-base]", "fastapi>=0.115.6,<0.122.0" ]
 websockets-base = [ "websockets>=13.1,<16.0" ]
 whisper = [ "faster-whisper~=1.1.1" ]

--- a/scripts/evals/eval.py
+++ b/scripts/evals/eval.py
@@ -10,9 +10,10 @@ import os
 import re
 import time
 import wave
+from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, List, Optional, Tuple

 import aiofiles
 from deepgram import LiveOptions
@@ -34,7 +35,8 @@ from pipecat.frames.frames import EndTaskFrame, LLMRunFrame, OutputImageRawFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.runner.types import RunnerArguments
@@ -52,6 +54,14 @@ EVAL_TIMEOUT_SECS = 120
 EvalPrompt = str | Tuple[str, ImageFile]


+@dataclass
+class EvalConfig:
+    prompt: EvalPrompt
+    eval: str
+    eval_speaks_first: bool = False
+    runner_args_body: Optional[Any] = None
+
+
 class EvalRunner:
    def __init__(
        self,
@@ -92,9 +102,7 @@ class EvalRunner:
    async def run_eval(
        self,
        example_file: str,
-        prompt: EvalPrompt,
-        eval: str,
-        user_speaks_first: bool = False,
+        eval_config: EvalConfig,
    ):
        if not re.match(self._pattern, example_file):
            return
@@ -111,10 +119,8 @@ class EvalRunner:

        try:
            tasks = [
-                asyncio.create_task(run_example_pipeline(script_path)),
-                asyncio.create_task(
-                    run_eval_pipeline(self, example_file, prompt, eval, user_speaks_first)
-                ),
+                asyncio.create_task(run_example_pipeline(script_path, eval_config)),
+                asyncio.create_task(run_eval_pipeline(self, example_file, eval_config)),
            ]
            _, pending = await asyncio.wait(tasks, timeout=EVAL_TIMEOUT_SECS)
            if pending:
@@ -176,7 +182,7 @@ class EvalRunner:
        return os.path.join(self._recordings_dir, f"{base_name}.wav")


-async def run_example_pipeline(script_path: Path):
+async def run_example_pipeline(script_path: Path, eval_config: EvalConfig):
    room_url = os.getenv("DAILY_SAMPLE_ROOM_URL")

    module = load_module_from_path(script_path)
@@ -195,6 +201,7 @@ async def run_example_pipeline(script_path: Path):

    runner_args = RunnerArguments()
    runner_args.pipeline_idle_timeout_secs = PIPELINE_IDLE_TIMEOUT_SECS
+    runner_args.body = eval_config.runner_args_body

    await module.run_bot(transport, runner_args)

@@ -202,9 +209,7 @@ async def run_example_pipeline(script_path: Path):
 async def run_eval_pipeline(
    eval_runner: EvalRunner,
    example_file: str,
-    prompt: EvalPrompt,
-    eval: str,
-    user_speaks_first: bool = False,
+    eval_config: EvalConfig,
 ):
    logger.info(f"Starting eval bot")

@@ -239,10 +244,10 @@ async def run_eval_pipeline(

    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))

-    llm.register_function("assert_eval", eval_runner.assert_eval)
+    llm.register_function("eval_function", eval_runner.assert_eval)

    eval_function = FunctionSchema(
-        name="assert_eval",
+        name="eval_function",
        description="Called when the user answers a question.",
        properties={
            "result": {
@@ -261,20 +266,21 @@ async def run_eval_pipeline(
    # Load example prompt depending on image.
    example_prompt = ""
    example_image: Optional[ImageFile] = None
-    if isinstance(prompt, str):
-        example_prompt = prompt
-    elif isinstance(prompt, tuple):
-        example_prompt, example_image = prompt
+    if isinstance(eval_config.prompt, str):
+        example_prompt = eval_config.prompt
+    elif isinstance(eval_config.prompt, tuple):
+        example_prompt, example_image = eval_config.prompt

-    eval_prompt = f"The answer is correct if it matches: {eval}."
    common_system_prompt = (
-        "The user might say things other than the answer and that's allowed. "
-        f"You should only call the eval function with your assessment when the user actually answers the question. {eval_prompt}"
+        "You should only call the eval function if:\n"
+        "- The user explicitly attempts to answer the question, AND\n"
+        f"- Their answer can be cleanly evaluated using: {eval_config.eval}\n"
+        "Ignore greetings, comments, non-answers, or requests for clarification."
    )
-    if user_speaks_first:
-        system_prompt = f"You are an LLM eval, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
+    if eval_config.eval_speaks_first:
+        system_prompt = f"You are an evaluation agent, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
    else:
-        system_prompt = f"You are an LLM eval, be extremly brief. Your goal is to first ask one question: {example_prompt}. {common_system_prompt}"
+        system_prompt = f"You are an evaluation agent, be extremly brief. First, ask one question: {example_prompt}. {common_system_prompt}"

    messages = [
        {
@@ -283,8 +289,8 @@ async def run_eval_pipeline(
        },
    ]

-    context = OpenAILLMContext(messages, tools)
-    context_aggregator = llm.create_context_aggregator(context)
+    context = LLMContext(messages, tools)
+    context_aggregator = LLMContextAggregatorPair(context)

    audio_buffer = AudioBufferProcessor()

@@ -329,9 +335,9 @@ async def run_eval_pipeline(

        # Default behavior is for the bot to speak first
        # If the eval bot speaks first, we append the prompt to the messages
-        if user_speaks_first:
+        if eval_config.eval_speaks_first:
            messages.append(
-                {"role": "user", "content": f"Start by saying this exactly: '{prompt}'"}
+                {"role": "user", "content": f"Start by saying this exactly: '{eval_config.prompt}'"}
            )
            await task.queue_frames([LLMRunFrame()])

--- a/scripts/evals/run-release-evals.py
+++ b/scripts/evals/run-release-evals.py
@@ -11,7 +11,7 @@ from datetime import datetime, timezone
 from pathlib import Path

 from dotenv import load_dotenv
-from eval import EvalRunner
+from eval import EvalConfig, EvalRunner
 from loguru import logger
 from PIL import Image
 from utils import check_env_variables
@@ -24,179 +24,184 @@ ASSETS_DIR = SCRIPT_DIR / "assets"

 FOUNDATIONAL_DIR = SCRIPT_DIR.parent.parent / "examples" / "foundational"

-# Speaking order constants
-USER_SPEAKS_FIRST = True
-BOT_SPEAKS_FIRST = False
-
-# Math
-PROMPT_SIMPLE_MATH = "A simple math addition."
-EVAL_SIMPLE_MATH = "Correct math addition."
-
-# Weather
-PROMPT_WEATHER = "What's the weather in San Francisco?"
-EVAL_WEATHER = (
-    "Something specific about the current weather in San Francisco, including the degrees."
+EVAL_SIMPLE_MATH = EvalConfig(
+    prompt="A simple math addition.",
+    eval="The user answers the math addition correctly.",
 )

-# Online search
-PROMPT_ONLINE_SEARCH = "What's the date right now in London?"
-EVAL_ONLINE_SEARCH = f"Today is {datetime.now(timezone.utc).strftime('%B %d, %Y')}."
+EVAL_WEATHER = EvalConfig(
+    prompt="What's the weather in San Francisco?",
+    eval="The user says something specific about the current weather in San Francisco, including the degrees.",
+)

-# Switch language
-PROMPT_SWITCH_LANGUAGE = "Say something in Spanish."
-EVAL_SWITCH_LANGUAGE = "The user is now talking in Spanish."
+EVAL_ONLINE_SEARCH = EvalConfig(
+    prompt="What's the date right now in London?",
+    eval=f"The user says today is {datetime.now(timezone.utc).strftime('%B %d, %Y')} in London.",
+)

-# Vision
-PROMPT_VISION = ("What do you see?", Image.open(ASSETS_DIR / "cat.jpg"))
-EVAL_VISION = "A cat description."
+EVAL_SWITCH_LANGUAGE = EvalConfig(
+    prompt="Say something in Spanish.",
+    eval="The user talks in Spanish.",
+)
+
+EVAL_VISION_CAMERA = EvalConfig(
+    prompt=("Briefly describe what you see.", Image.open(ASSETS_DIR / "cat.jpg")),
+    eval="The user provides a cat description.",
+)
+
+
+def EVAL_VISION_IMAGE(*, eval_speaks_first: bool = False):
+    return EvalConfig(
+        prompt="Briefly describe this image.",
+        eval="The user provides a cat description.",
+        eval_speaks_first=eval_speaks_first,
+        runner_args_body={
+            "image_path": ASSETS_DIR / "cat.jpg",
+            "question": "Briefly describe this image.",
+        },
+    )
+
+
+EVAL_VOICEMAIL = EvalConfig(
+    prompt="Please leave a message.",
+    eval="The user leaves a voicemail message.",
+    eval_speaks_first=True,
+)
+
+EVAL_CONVERSATION = EvalConfig(
+    prompt="Hello, this is Mark.",
+    eval="The user replies with a greeting.",
+    eval_speaks_first=True,
+)

-# Voicemail
-PROMPT_VOICEMAIL = "Please leave a message after the beep."
-EVAL_VOICEMAIL = "Assess the conversation and determine if it is a voicemail."
-PROMPT_CONVERSATION = "Hello, this is Mark."
-EVAL_CONVERSATION = "A start of a conversation, not a voicemail."

 TESTS_07 = [
    # 07 series
-    ("07-interruptible.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07-interruptible-cartesia-http.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07a-interruptible-speechmatics.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07aa-interruptible-soniox.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07ab-interruptible-inworld-http.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07ac-interruptible-asyncai.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07ac-interruptible-asyncai-http.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07b-interruptible-langchain.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07c-interruptible-deepgram.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07d-interruptible-elevenlabs.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    (
-        "07d-interruptible-elevenlabs-http.py",
-        PROMPT_SIMPLE_MATH,
-        EVAL_SIMPLE_MATH,
-        BOT_SPEAKS_FIRST,
-    ),
-    ("07e-interruptible-playht.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07e-interruptible-playht-http.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07f-interruptible-azure.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07g-interruptible-openai.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07h-interruptible-openpipe.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07j-interruptible-gladia.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07k-interruptible-lmnt.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07l-interruptible-groq.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07m-interruptible-aws.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07m-interruptible-aws-strands.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("07n-interruptible-gemini.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07n-interruptible-google.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07o-interruptible-assemblyai.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07q-interruptible-rime.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07q-interruptible-rime-http.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07r-interruptible-riva-nim.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    (
-        "07s-interruptible-google-audio-in.py",
-        PROMPT_SIMPLE_MATH,
-        EVAL_SIMPLE_MATH,
-        BOT_SPEAKS_FIRST,
-    ),
-    ("07t-interruptible-fish.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07v-interruptible-neuphonic.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07v-interruptible-neuphonic-http.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07w-interruptible-fal.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07y-interruptible-minimax.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    ("07z-interruptible-sarvam.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
+    ("07-interruptible.py", EVAL_SIMPLE_MATH),
+    ("07-interruptible-cartesia-http.py", EVAL_SIMPLE_MATH),
+    ("07a-interruptible-speechmatics.py", EVAL_SIMPLE_MATH),
+    ("07aa-interruptible-soniox.py", EVAL_SIMPLE_MATH),
+    ("07ab-interruptible-inworld-http.py", EVAL_SIMPLE_MATH),
+    ("07ac-interruptible-asyncai.py", EVAL_SIMPLE_MATH),
+    ("07ac-interruptible-asyncai-http.py", EVAL_SIMPLE_MATH),
+    ("07b-interruptible-langchain.py", EVAL_SIMPLE_MATH),
+    ("07c-interruptible-deepgram.py", EVAL_SIMPLE_MATH),
+    ("07c-interruptible-deepgram-flux.py", EVAL_SIMPLE_MATH),
+    ("07c-interruptible-deepgram-http.py", EVAL_SIMPLE_MATH),
+    ("07d-interruptible-elevenlabs.py", EVAL_SIMPLE_MATH),
+    ("07d-interruptible-elevenlabs-http.py", EVAL_SIMPLE_MATH),
+    ("07f-interruptible-azure.py", EVAL_SIMPLE_MATH),
+    ("07g-interruptible-openai.py", EVAL_SIMPLE_MATH),
+    ("07h-interruptible-openpipe.py", EVAL_SIMPLE_MATH),
+    ("07j-interruptible-gladia.py", EVAL_SIMPLE_MATH),
+    ("07k-interruptible-lmnt.py", EVAL_SIMPLE_MATH),
+    ("07l-interruptible-groq.py", EVAL_SIMPLE_MATH),
+    ("07m-interruptible-aws.py", EVAL_SIMPLE_MATH),
+    ("07m-interruptible-aws-strands.py", EVAL_WEATHER),
+    ("07n-interruptible-gemini.py", EVAL_SIMPLE_MATH),
+    ("07n-interruptible-google.py", EVAL_SIMPLE_MATH),
+    ("07o-interruptible-assemblyai.py", EVAL_SIMPLE_MATH),
+    ("07q-interruptible-rime.py", EVAL_SIMPLE_MATH),
+    ("07q-interruptible-rime-http.py", EVAL_SIMPLE_MATH),
+    ("07r-interruptible-riva-nim.py", EVAL_SIMPLE_MATH),
+    ("07s-interruptible-google-audio-in.py", EVAL_SIMPLE_MATH),
+    ("07t-interruptible-fish.py", EVAL_SIMPLE_MATH),
+    ("07v-interruptible-neuphonic.py", EVAL_SIMPLE_MATH),
+    ("07v-interruptible-neuphonic-http.py", EVAL_SIMPLE_MATH),
+    ("07w-interruptible-fal.py", EVAL_SIMPLE_MATH),
+    ("07y-interruptible-minimax.py", EVAL_SIMPLE_MATH),
+    ("07z-interruptible-sarvam.py", EVAL_SIMPLE_MATH),
+    ("07ae-interruptible-hume.py", EVAL_SIMPLE_MATH),
    # Needs a local XTTS docker instance running.
-    # ("07i-interruptible-xtts.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
+    # ("07i-interruptible-xtts.py", EVAL_SIMPLE_MATH),
    # Needs a Krisp license.
-    # ("07p-interruptible-krisp.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
+    # ("07p-interruptible-krisp.py", EVAL_SIMPLE_MATH),
    # Needs GPU resources.
-    # ("07u-interruptible-ultravox.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
+    # ("07u-interruptible-ultravox.py", EVAL_SIMPLE_MATH),
 ]

 TESTS_12 = [
-    ("12-describe-video.py", PROMPT_VISION, EVAL_VISION, BOT_SPEAKS_FIRST),
-    ("12a-describe-video-gemini-flash.py", PROMPT_VISION, EVAL_VISION, BOT_SPEAKS_FIRST),
-    ("12b-describe-video-gpt-4o.py", PROMPT_VISION, EVAL_VISION, BOT_SPEAKS_FIRST),
-    ("12c-describe-video-anthropic.py", PROMPT_VISION, EVAL_VISION, BOT_SPEAKS_FIRST),
+    ("12-describe-image-openai.py", EVAL_VISION_IMAGE(eval_speaks_first=True)),
+    ("12a-describe-image-anthropic.py", EVAL_VISION_IMAGE(eval_speaks_first=True)),
+    ("12b-describe-image-aws.py", EVAL_VISION_IMAGE(eval_speaks_first=True)),
+    ("12c-describe-image-gemini-flash.py", EVAL_VISION_IMAGE(eval_speaks_first=True)),
+    ("12d-describe-image-moondream.py", EVAL_VISION_IMAGE()),
 ]

 TESTS_14 = [
-    ("14-function-calling.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14a-function-calling-anthropic.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14b-function-calling-anthropic-video.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14d-function-calling-video.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14e-function-calling-google.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14f-function-calling-groq.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14g-function-calling-grok.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14h-function-calling-azure.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14i-function-calling-fireworks.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14j-function-calling-nim.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14k-function-calling-cerebras.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14m-function-calling-openrouter.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14n-function-calling-perplexity.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14p-function-calling-gemini-vertex-ai.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14q-function-calling-qwen.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14r-function-calling-aws.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14v-function-calling-openai.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("14w-function-calling-mistral.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
+    ("14-function-calling.py", EVAL_WEATHER),
+    ("14a-function-calling-anthropic.py", EVAL_WEATHER),
+    ("14e-function-calling-google.py", EVAL_WEATHER),
+    ("14f-function-calling-groq.py", EVAL_WEATHER),
+    ("14g-function-calling-grok.py", EVAL_WEATHER),
+    ("14h-function-calling-azure.py", EVAL_WEATHER),
+    ("14i-function-calling-fireworks.py", EVAL_WEATHER),
+    ("14j-function-calling-nim.py", EVAL_WEATHER),
+    ("14k-function-calling-cerebras.py", EVAL_WEATHER),
+    ("14m-function-calling-openrouter.py", EVAL_WEATHER),
+    ("14n-function-calling-perplexity.py", EVAL_WEATHER),
+    ("14p-function-calling-gemini-vertex-ai.py", EVAL_WEATHER),
+    ("14q-function-calling-qwen.py", EVAL_WEATHER),
+    ("14r-function-calling-aws.py", EVAL_WEATHER),
+    ("14v-function-calling-openai.py", EVAL_WEATHER),
+    ("14w-function-calling-mistral.py", EVAL_WEATHER),
+    ("14x-function-calling-openpipe.py", EVAL_WEATHER),
+    # Video
+    ("14d-function-calling-anthropic-video.py", EVAL_VISION_CAMERA),
+    ("14d-function-calling-aws-video.py", EVAL_VISION_CAMERA),
+    ("14d-function-calling-gemini-flash-video.py", EVAL_VISION_CAMERA),
+    ("14d-function-calling-moondream-video.py", EVAL_VISION_CAMERA),
+    ("14d-function-calling-openai-video.py", EVAL_VISION_CAMERA),
    # Currently not working.
-    # ("14c-function-calling-together.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    # ("14l-function-calling-deepseek.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    # ("14o-function-calling-gemini-openai-format.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
+    # ("14c-function-calling-together.py", EVAL_WEATHER),
+    # ("14l-function-calling-deepseek.py", EVAL_WEATHER),
+    # ("14o-function-calling-gemini-openai-format.py", EVAL_WEATHER),
 ]

 TESTS_15 = [
-    ("15a-switch-languages.py", PROMPT_SWITCH_LANGUAGE, EVAL_SWITCH_LANGUAGE, BOT_SPEAKS_FIRST),
+    ("15a-switch-languages.py", EVAL_SWITCH_LANGUAGE),
 ]

 TESTS_19 = [
-    ("19-openai-realtime-beta.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("19a-azure-realtime-beta.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("19b-openai-realtime-text.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    ("19b-openai-realtime-beta-text.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
+    ("19-openai-realtime.py", EVAL_WEATHER),
+    ("19-openai-realtime-beta.py", EVAL_WEATHER),
+    # OpenAI Realtime not released on Azure yet
+    # ("19a-azure-realtime.py", EVAL_WEATHER),
+    ("19a-azure-realtime-beta.py", EVAL_WEATHER),
+    ("19b-openai-realtime-text.py", EVAL_WEATHER),
+    ("19b-openai-realtime-beta-text.py", EVAL_WEATHER),
 ]

 TESTS_21 = [
-    ("21a-tavus-video-service.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
+    ("21a-tavus-video-service.py", EVAL_SIMPLE_MATH),
 ]

 TESTS_26 = [
-    ("26-gemini-multimodal-live.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    (
-        "26a-gemini-multimodal-live-transcription.py",
-        PROMPT_SIMPLE_MATH,
-        EVAL_SIMPLE_MATH,
-        BOT_SPEAKS_FIRST,
-    ),
-    (
-        "26b-gemini-multimodal-live-function-calling.py",
-        PROMPT_WEATHER,
-        EVAL_WEATHER,
-        BOT_SPEAKS_FIRST,
-    ),
-    ("26c-gemini-multimodal-live-video.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    (
-        "26e-gemini-multimodal-google-search.py",
-        PROMPT_ONLINE_SEARCH,
-        EVAL_ONLINE_SEARCH,
-        BOT_SPEAKS_FIRST,
-    ),
+    ("26-gemini-live.py", EVAL_SIMPLE_MATH),
+    ("26a-gemini-live-transcription.py", EVAL_SIMPLE_MATH),
+    ("26b-gemini-live-function-calling.py", EVAL_WEATHER),
+    ("26c-gemini-live-video.py", EVAL_VISION_CAMERA),
+    ("26e-gemini-live-google-search.py", EVAL_ONLINE_SEARCH),
+    ("26h-gemini-live-vertex-function-calling.py", EVAL_WEATHER),
    # Currently not working.
-    # ("26d-gemini-multimodal-live-text.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
+    # ("26d-gemini-live-text.py", EVAL_SIMPLE_MATH),
 ]

 TESTS_27 = [
-    ("27-simli-layer.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
+    ("27-simli-layer.py", EVAL_SIMPLE_MATH),
 ]

 TESTS_40 = [
-    ("40-aws-nova-sonic.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
+    ("40-aws-nova-sonic.py", EVAL_SIMPLE_MATH),
 ]

 TESTS_43 = [
-    ("43a-heygen-video-service.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
+    ("43a-heygen-video-service.py", EVAL_SIMPLE_MATH),
 ]

 TESTS_44 = [
-    ("44-voicemail-detection.py", PROMPT_VOICEMAIL, EVAL_VOICEMAIL, USER_SPEAKS_FIRST),
-    ("44-voicemail-detection.py", PROMPT_CONVERSATION, EVAL_CONVERSATION, USER_SPEAKS_FIRST),
+    ("44-voicemail-detection.py", EVAL_VOICEMAIL),
+    ("44-voicemail-detection.py", EVAL_CONVERSATION),
 ]

 TESTS = [
@@ -234,9 +239,9 @@ async def main(args: argparse.Namespace):

    # Parse test config: (test, prompt, eval, user_speaks_first)
    for test_config in TESTS:
-        test, prompt, eval, user_speaks_first = test_config
+        test, eval_config = test_config

-        await runner.run_eval(test, prompt, eval, user_speaks_first)
+        await runner.run_eval(test, eval_config)

    runner.print_results()

--- a/src/pipecat/adapters/schemas/tools_schema.py
+++ b/src/pipecat/adapters/schemas/tools_schema.py
@@ -22,9 +22,12 @@ class AdapterType(Enum):

    Parameters:
        GEMINI: Google Gemini adapter - currently the only service supporting custom tools.
+        SHIM: Backward compatibility shim for creating ToolsSchemas from lists of tools in
+              any format, used by LLMContext.from_openai_context.
    """

    GEMINI = "gemini"  # that is the only service where we are able to add custom tools for now
+    SHIM = "shim"  # for use as backward compatibility shim for creating ToolsSchemas from list of tools in any format


 class ToolsSchema:
--- a/src/pipecat/adapters/services/anthropic_adapter.py
+++ b/src/pipecat/adapters/services/anthropic_adapter.py
@@ -110,7 +110,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
        system = NOT_GIVEN
        messages = []

-        # first, map messages using self._from_universal_context_message(m)
+        # First, map messages using self._from_universal_context_message(m)
        try:
            messages = [self._from_universal_context_message(m) for m in universal_context_messages]
        except Exception as e:
@@ -245,13 +245,25 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
                    item["text"] = "(empty)"
                # handle image_url -> image conversion
                if item["type"] == "image_url":
-                    item["type"] = "image"
-                    item["source"] = {
-                        "type": "base64",
-                        "media_type": "image/jpeg",
-                        "data": item["image_url"]["url"].split(",")[1],
-                    }
-                    del item["image_url"]
+                    if item["image_url"]["url"].startswith("data:"):
+                        item["type"] = "image"
+                        item["source"] = {
+                            "type": "base64",
+                            "media_type": "image/jpeg",
+                            "data": item["image_url"]["url"].split(",")[1],
+                        }
+                        del item["image_url"]
+                    elif item["image_url"]["url"].startswith("http"):
+                        item["type"] = "image"
+                        item["source"] = {
+                            "type": "url",
+                            "url": item["image_url"]["url"],
+                        }
+                        del item["image_url"]
+                    else:
+                        url = item["image_url"]["url"]
+                        logger.warning(f"Unsupported 'image_url': {url}")
+
            # In the case where there's a single image in the list (like what
            # would result from a UserImageRawFrame), ensure that the image
            # comes before text, as recommended by Anthropic docs
--- a/src/pipecat/adapters/services/aws_nova_sonic_adapter.py
+++ b/src/pipecat/adapters/services/aws_nova_sonic_adapter.py
@@ -6,13 +6,47 @@

 """AWS Nova Sonic LLM adapter for Pipecat."""

+import copy
 import json
-from typing import Any, Dict, List, TypedDict
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, List, Optional, TypedDict
+
+from loguru import logger

 from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
 from pipecat.adapters.schemas.function_schema import FunctionSchema
-from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema
+from pipecat.processors.aggregators.llm_context import LLMContext, LLMContextMessage
+
+
+class Role(Enum):
+    """Roles supported in AWS Nova Sonic conversations.
+
+    Parameters:
+        SYSTEM: System-level messages (not used in conversation history).
+        USER: Messages sent by the user.
+        ASSISTANT: Messages sent by the assistant.
+        TOOL: Messages sent by tools (not used in conversation history).
+    """
+
+    SYSTEM = "SYSTEM"
+    USER = "USER"
+    ASSISTANT = "ASSISTANT"
+    TOOL = "TOOL"
+
+
+@dataclass
+class AWSNovaSonicConversationHistoryMessage:
+    """A single message in AWS Nova Sonic conversation history.
+
+    Parameters:
+        role: The role of the message sender (USER or ASSISTANT only).
+        text: The text content of the message.
+    """
+
+    role: Role  # only USER and ASSISTANT
+    text: str


 class AWSNovaSonicLLMInvocationParams(TypedDict):
@@ -21,7 +55,9 @@ class AWSNovaSonicLLMInvocationParams(TypedDict):
    This is a placeholder until support for universal LLMContext machinery is added for AWS Nova Sonic.
    """

-    pass
+    system_instruction: Optional[str]
+    messages: List[AWSNovaSonicConversationHistoryMessage]
+    tools: List[Dict[str, Any]]


 class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
@@ -34,7 +70,7 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
    @property
    def id_for_llm_specific_messages(self) -> str:
        """Get the identifier used in LLMSpecificMessage instances for AWS Nova Sonic."""
-        raise NotImplementedError("Universal LLMContext is not yet supported for AWS Nova Sonic.")
+        return "aws-nova-sonic"

    def get_llm_invocation_params(self, context: LLMContext) -> AWSNovaSonicLLMInvocationParams:
        """Get AWS Nova Sonic-specific LLM invocation parameters from a universal LLM context.
@@ -47,7 +83,13 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
        Returns:
            Dictionary of parameters for invoking AWS Nova Sonic's LLM API.
        """
-        raise NotImplementedError("Universal LLMContext is not yet supported for AWS Nova Sonic.")
+        messages = self._from_universal_context_messages(self.get_messages(context))
+        return {
+            "system_instruction": messages.system_instruction,
+            "messages": messages.messages,
+            # NOTE: LLMContext's tools are guaranteed to be a ToolsSchema (or NOT_GIVEN)
+            "tools": self.from_standard_tools(context.tools) or [],
+        }

    def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
        """Get messages from a universal LLM context in a format ready for logging about AWS Nova Sonic.
@@ -62,7 +104,75 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
        Returns:
            List of messages in a format ready for logging about AWS Nova Sonic.
        """
-        raise NotImplementedError("Universal LLMContext is not yet supported for AWS Nova Sonic.")
+        return self._from_universal_context_messages(self.get_messages(context)).messages
+
+    @dataclass
+    class ConvertedMessages:
+        """Container for Google-formatted messages converted from universal context."""
+
+        messages: List[AWSNovaSonicConversationHistoryMessage]
+        system_instruction: Optional[str] = None
+
+    def _from_universal_context_messages(
+        self, universal_context_messages: List[LLMContextMessage]
+    ) -> ConvertedMessages:
+        system_instruction = None
+        messages = []
+
+        # Bail if there are no messages
+        if not universal_context_messages:
+            return self.ConvertedMessages()
+
+        universal_context_messages = copy.deepcopy(universal_context_messages)
+
+        # If we have a "system" message as our first message, let's pull that out into "instruction"
+        if universal_context_messages[0].get("role") == "system":
+            system = universal_context_messages.pop(0)
+            content = system.get("content")
+            if isinstance(content, str):
+                system_instruction = content
+            elif isinstance(content, list):
+                system_instruction = content[0].get("text")
+            if system_instruction:
+                self._system_instruction = system_instruction
+
+        # Process remaining messages to fill out conversation history.
+        # Nova Sonic supports "user" and "assistant" messages in history.
+        for universal_context_message in universal_context_messages:
+            message = self._from_universal_context_message(universal_context_message)
+            if message:
+                messages.append(message)
+
+        return self.ConvertedMessages(messages=messages, system_instruction=system_instruction)
+
+    def _from_universal_context_message(self, message) -> AWSNovaSonicConversationHistoryMessage:
+        """Convert standard message format to Nova Sonic format.
+
+        Args:
+            message: Standard message dictionary to convert.
+
+        Returns:
+            Nova Sonic conversation history message, or None if not convertible.
+        """
+        role = message.get("role")
+        if message.get("role") == "user" or message.get("role") == "assistant":
+            content = message.get("content")
+            if isinstance(message.get("content"), list):
+                content = ""
+                for c in message.get("content"):
+                    if c.get("type") == "text":
+                        content += " " + c.get("text")
+                    else:
+                        logger.error(
+                            f"Unhandled content type in context message: {c.get('type')} - {message}"
+                        )
+            # There won't be content if this is an assistant tool call entry.
+            # We're ignoring those since they can't be loaded into AWS Nova Sonic conversation
+            # history
+            if content:
+                return AWSNovaSonicConversationHistoryMessage(role=Role[role.upper()], text=content)
+        # NOTE: we're ignoring messages with role "tool" since they can't be loaded into AWS Nova
+        # Sonic conversation history

    @staticmethod
    def _to_aws_nova_sonic_function_format(function: FunctionSchema) -> Dict[str, Any]:
@@ -100,4 +210,18 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
            List of dictionaries in AWS Nova Sonic function format.
        """
        functions_schema = tools_schema.standard_tools
-        return [self._to_aws_nova_sonic_function_format(func) for func in functions_schema]
+        standard_tools = [
+            self._to_aws_nova_sonic_function_format(func) for func in functions_schema
+        ]
+
+        # For backward compatibility, AWS Nova Sonic can still be used with
+        # tools in dict format, even though it always uses `LLMContext` under
+        # the hood (via `LLMContext.from_openai_context()`).
+        # To support this behavior, we use "shimmed" custom tools here.
+        # (We maintain this backward compatibility because users aren't
+        # *knowingly* opting into the new `LLMContext`.)
+        shimmed_tools = []
+        if tools_schema.custom_tools:
+            shimmed_tools = tools_schema.custom_tools.get(AdapterType.SHIM, [])
+
+        return standard_tools + shimmed_tools
--- a/src/pipecat/adapters/services/bedrock_adapter.py
+++ b/src/pipecat/adapters/services/bedrock_adapter.py
@@ -107,7 +107,7 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]):
        system = None
        messages = []

-        # first, map messages using self._from_universal_context_message(m)
+        # First, map messages using self._from_universal_context_message(m)
        try:
            messages = [self._from_universal_context_message(m) for m in universal_context_messages]
        except Exception as e:
@@ -256,15 +256,22 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]):
                    new_content.append({"text": text_content})
                # handle image_url -> image conversion
                if item["type"] == "image_url":
-                    new_item = {
-                        "image": {
-                            "format": "jpeg",
-                            "source": {
-                                "bytes": base64.b64decode(item["image_url"]["url"].split(",")[1])
-                            },
+                    if item["image_url"]["url"].startswith("data:"):
+                        new_item = {
+                            "image": {
+                                "format": "jpeg",
+                                "source": {
+                                    "bytes": base64.b64decode(
+                                        item["image_url"]["url"].split(",")[1]
+                                    )
+                                },
+                            }
                        }
-                    }
-                    new_content.append(new_item)
+                        new_content.append(new_item)
+                    else:
+                        url = item["image_url"]["url"]
+                        logger.warning(f"Unsupported 'image_url': {url}")
+
            # In the case where there's a single image in the list (like what
            # would result from a UserImageRawFrame), ensure that the image
            # comes before text
--- a/src/pipecat/adapters/services/gemini_adapter.py
+++ b/src/pipecat/adapters/services/gemini_adapter.py
@@ -8,8 +8,8 @@

 import base64
 import json
-from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, TypedDict
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple, TypedDict

 from loguru import logger
 from openai import NotGiven
@@ -24,13 +24,7 @@ from pipecat.processors.aggregators.llm_context import (
 )

 try:
-    from google.genai.types import (
-        Blob,
-        Content,
-        FunctionCall,
-        FunctionResponse,
-        Part,
-    )
+    from google.genai.types import Blob, Content, FileData, FunctionCall, FunctionResponse, Part
 except ModuleNotFoundError as e:
    logger.error(f"Exception: {e}")
    logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.")
@@ -86,10 +80,48 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
            List of tool definitions formatted for Gemini's function-calling API.
            Includes both converted standard tools and any custom Gemini-specific tools.
        """
+
+        def _strip_additional_properties(schema: Dict[str, Any]) -> Dict[str, Any]:
+            """Recursively remove "additionalProperties" fields from JSON schema, as they're not supported by Gemini.
+
+            Args:
+                schema: The JSON schema dict to process.
+
+            Returns:
+                JSON schema dict with "additionalProperties" stripped out.
+            """
+            if not isinstance(schema, dict):
+                return schema
+
+            result = {}
+
+            for key, value in schema.items():
+                if key == "additionalProperties":
+                    continue
+                elif isinstance(value, dict):
+                    result[key] = _strip_additional_properties(value)
+                elif isinstance(value, list):
+                    result[key] = [
+                        _strip_additional_properties(item) if isinstance(item, dict) else item
+                        for item in value
+                    ]
+                else:
+                    result[key] = value
+
+            return result
+
        functions_schema = tools_schema.standard_tools
-        formatted_standard_tools = [
-            {"function_declarations": [func.to_default_dict() for func in functions_schema]}
-        ]
+        if functions_schema:
+            formatted_functions = []
+            for func in functions_schema:
+                func_dict = func.to_default_dict()
+                func_dict["parameters"]["properties"] = _strip_additional_properties(
+                    func_dict["parameters"]["properties"]
+                )
+                formatted_functions.append(func_dict)
+            formatted_standard_tools = [{"function_declarations": formatted_functions}]
+        else:
+            formatted_standard_tools = []
        custom_gemini_tools = []
        if tools_schema.custom_tools:
            custom_gemini_tools = tools_schema.custom_tools.get(AdapterType.GEMINI, [])
@@ -131,6 +163,28 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
        messages: List[Content]
        system_instruction: Optional[str] = None

+    @dataclass
+    class MessageConversionResult:
+        """Result of converting a single universal context message to Google format.
+
+        Either content (a Google Content object) or a system instruction string
+        is guaranteed to be set.
+
+        Also returns a tool call ID to name mapping for any tool calls
+        discovered in the message.
+        """
+
+        content: Optional[Content] = None
+        system_instruction: Optional[str] = None
+        tool_call_id_to_name_mapping: Dict[str, str] = field(default_factory=dict)
+
+    @dataclass
+    class MessageConversionParams:
+        """Parameters for converting a single universal context message to Google format."""
+
+        already_have_system_instruction: bool
+        tool_call_id_to_name_mapping: Dict[str, str]
+
    def _from_universal_context_messages(
        self, universal_context_messages: List[LLMContextMessage]
    ) -> ConvertedMessages:
@@ -154,24 +208,26 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
        """
        system_instruction = None
        messages = []
+        tool_call_id_to_name_mapping = {}

        # Process each message, preserving Google-formatted messages and converting others
        for message in universal_context_messages:
-            if isinstance(message, LLMSpecificMessage):
-                # Assume that LLMSpecificMessage wraps a message in Google format
-                messages.append(message.message)
-                continue
-
-            # Convert standard format to Google format
-            converted = self._from_standard_message(
-                message, already_have_system_instruction=bool(system_instruction)
+            result = self._from_universal_context_message(
+                message,
+                params=self.MessageConversionParams(
+                    already_have_system_instruction=bool(system_instruction),
+                    tool_call_id_to_name_mapping=tool_call_id_to_name_mapping,
+                ),
            )
-            if isinstance(converted, Content):
-                # Regular (non-system) message
-                messages.append(converted)
-            else:
-                # System instruction
-                system_instruction = converted
+            # Each result is either a Content or a system instruction
+            if result.content:
+                messages.append(result.content)
+            elif result.system_instruction:
+                system_instruction = result.system_instruction
+
+            # Merge tool call ID to name mapping
+            if result.tool_call_id_to_name_mapping:
+                tool_call_id_to_name_mapping.update(result.tool_call_id_to_name_mapping)

        # Check if we only have function-related messages (no regular text)
        has_regular_messages = any(
@@ -191,9 +247,16 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):

        return self.ConvertedMessages(messages=messages, system_instruction=system_instruction)

+    def _from_universal_context_message(
+        self, message: LLMContextMessage, *, params: MessageConversionParams
+    ) -> MessageConversionResult:
+        if isinstance(message, LLMSpecificMessage):
+            return self.MessageConversionResult(content=message.message)
+        return self._from_standard_message(message, params=params)
+
    def _from_standard_message(
-        self, message: LLMStandardMessage, already_have_system_instruction: bool
-    ) -> Content | str:
+        self, message: LLMStandardMessage, *, params: MessageConversionParams
+    ) -> MessageConversionResult:
        """Convert standard universal context message to Google Content object.

        Handles conversion of text, images, and function calls to Google's
@@ -203,10 +266,11 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
        Args:
            message: Message in standard universal context format.
            already_have_system_instruction: Whether we already have a system instruction
+            params: Parameters for conversion.

        Returns:
-            Content object with role and parts, or a plain string for system
-            messages.
+            MessageConversionResult containing either a Content object or a
+            system instruction string.

        Examples:
            Standard text message::
@@ -240,38 +304,49 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
            Converts to Google Content with::

                Content(
-                    role="model",
+                    role="user",
                    parts=[Part(function_call=FunctionCall(name="search", args={"query": "test"}))]
                )
        """
        role = message["role"]
        content = message.get("content", [])
+
        if role == "system":
-            if already_have_system_instruction:
+            if params.already_have_system_instruction:
                role = "user"  # Convert system message to user role if we already have a system instruction
            else:
-                # System instructions are returned as plain text
+                system_instruction: str = None
                if isinstance(content, str):
-                    return content
+                    system_instruction = content
                elif isinstance(content, list):
                    # If content is a list, we assume it's a list of text parts, per the standard
-                    return " ".join(part["text"] for part in content if part.get("type") == "text")
+                    system_instruction = " ".join(
+                        part["text"] for part in content if part.get("type") == "text"
+                    )
+                if system_instruction:
+                    return self.MessageConversionResult(system_instruction=system_instruction)
        elif role == "assistant":
            role = "model"

        parts = []
+        tool_call_id_to_name_mapping = {}
+
        if message.get("tool_calls"):
            for tc in message["tool_calls"]:
+                id = tc["id"]
+                name = tc["function"]["name"]
+                tool_call_id_to_name_mapping[id] = name
                parts.append(
                    Part(
                        function_call=FunctionCall(
-                            name=tc["function"]["name"],
+                            id=id,
+                            name=name,
                            args=json.loads(tc["function"]["arguments"]),
                        )
                    )
                )
        elif role == "tool":
-            role = "model"
+            role = "user"
            try:
                response = json.loads(message["content"])
                if isinstance(response, dict):
@@ -282,10 +357,18 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
                # Response might not be JSON-deserializable.
                # This occurs with a UserImageFrame, for example, where we get a plain "COMPLETED" string.
                response_dict = {"value": message["content"]}
+
+            # Get function name from mapping using tool_call_id, or fallback
+            tool_call_id = message.get("tool_call_id")
+            function_name = "tool_call_result"  # Default fallback
+            if tool_call_id and tool_call_id in params.tool_call_id_to_name_mapping:
+                function_name = params.tool_call_id_to_name_mapping[tool_call_id]
+
            parts.append(
                Part(
                    function_response=FunctionResponse(
-                        name="tool_call_result",  # seems to work to hard-code the same name every time
+                        id=tool_call_id,
+                        name=function_name,
                        response=response_dict,
                    )
                )
@@ -296,7 +379,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
            for c in content:
                if c["type"] == "text":
                    parts.append(Part(text=c["text"]))
-                elif c["type"] == "image_url":
+                elif c["type"] == "image_url" and c["image_url"]["url"].startswith("data:"):
                    parts.append(
                        Part(
                            inline_data=Blob(
@@ -305,9 +388,25 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
                            )
                        )
                    )
+                elif c["type"] == "image_url":
+                    url = c["image_url"]["url"]
+                    logger.warning(f"Unsupported 'image_url': {url}")
                elif c["type"] == "input_audio":
                    input_audio = c["input_audio"]
                    audio_bytes = base64.b64decode(input_audio["data"])
                    parts.append(Part(inline_data=Blob(mime_type="audio/wav", data=audio_bytes)))
+                elif c["type"] == "file_data":
+                    file_data = c["file_data"]
+                    parts.append(
+                        Part(
+                            file_data=FileData(
+                                mime_type=file_data.get("mime_type"),
+                                file_uri=file_data.get("file_uri"),
+                            )
+                        )
+                    )

-        return Content(role=role, parts=parts)
+        return self.MessageConversionResult(
+            content=Content(role=role, parts=parts),
+            tool_call_id_to_name_mapping=tool_call_id_to_name_mapping,
+        )
--- a/src/pipecat/adapters/services/open_ai_realtime_adapter.py
+++ b/src/pipecat/adapters/services/open_ai_realtime_adapter.py
@@ -6,12 +6,18 @@

 """OpenAI Realtime LLM adapter for Pipecat."""

-from typing import Any, Dict, List, TypedDict
+import copy
+import json
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, TypedDict
+
+from loguru import logger

 from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
 from pipecat.adapters.schemas.function_schema import FunctionSchema
-from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema
+from pipecat.processors.aggregators.llm_context import LLMContext, LLMContextMessage
+from pipecat.services.openai.realtime import events


 class OpenAIRealtimeLLMInvocationParams(TypedDict):
@@ -20,7 +26,9 @@ class OpenAIRealtimeLLMInvocationParams(TypedDict):
    This is a placeholder until support for universal LLMContext machinery is added for OpenAI Realtime.
    """

-    pass
+    system_instruction: Optional[str]
+    messages: List[events.ConversationItem]
+    tools: List[Dict[str, Any]]


 class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
@@ -33,7 +41,7 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
    @property
    def id_for_llm_specific_messages(self) -> str:
        """Get the identifier used in LLMSpecificMessage instances for OpenAI Realtime."""
-        raise NotImplementedError("Universal LLMContext is not yet supported for OpenAI Realtime.")
+        return "openai-realtime"

    def get_llm_invocation_params(self, context: LLMContext) -> OpenAIRealtimeLLMInvocationParams:
        """Get OpenAI Realtime-specific LLM invocation parameters from a universal LLM context.
@@ -46,7 +54,13 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
        Returns:
            Dictionary of parameters for invoking OpenAI Realtime's API.
        """
-        raise NotImplementedError("Universal LLMContext is not yet supported for OpenAI Realtime.")
+        messages = self._from_universal_context_messages(self.get_messages(context))
+        return {
+            "system_instruction": messages.system_instruction,
+            "messages": messages.messages,
+            # NOTE: LLMContext's tools are guaranteed to be a ToolsSchema (or NOT_GIVEN)
+            "tools": self.from_standard_tools(context.tools) or [],
+        }

    def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
        """Get messages from a universal LLM context in a format ready for logging about OpenAI Realtime.
@@ -61,7 +75,124 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
        Returns:
            List of messages in a format ready for logging about OpenAI Realtime.
        """
-        raise NotImplementedError("Universal LLMContext is not yet supported for OpenAI Realtime.")
+        # NOTE: this is the same as in OpenAIAdapter, as that's what it was
+        # prior to a refactor. Worth noting that for OpenAI Realtime
+        # specifically, not everything handled here is necessarily supported
+        # (or supported yet).
+        msgs = []
+        for message in self.get_messages(context):
+            msg = copy.deepcopy(message)
+            if "content" in msg:
+                if isinstance(msg["content"], list):
+                    for item in msg["content"]:
+                        if item["type"] == "image_url":
+                            if item["image_url"]["url"].startswith("data:image/"):
+                                item["image_url"]["url"] = "data:image/..."
+                        if item["type"] == "input_audio":
+                            item["input_audio"]["data"] = "..."
+            if "mime_type" in msg and msg["mime_type"].startswith("image/"):
+                msg["data"] = "..."
+            msgs.append(msg)
+        return msgs
+
+    @dataclass
+    class ConvertedMessages:
+        """Container for OpenAI-formatted messages converted from universal context."""
+
+        messages: List[events.ConversationItem]
+        system_instruction: Optional[str] = None
+
+    def _from_universal_context_messages(
+        self, universal_context_messages: List[LLMContextMessage]
+    ) -> ConvertedMessages:
+        # We can't load a long conversation history into the openai realtime api yet. (The API/model
+        # forgets that it can do audio, if you do a series of `conversation.item.create` calls.) So
+        # our general strategy until this is fixed is just to put everything into a first "user"
+        # message as a single input.
+
+        if not universal_context_messages:
+            return self.ConvertedMessages(messages=[])
+
+        messages = copy.deepcopy(universal_context_messages)
+        system_instruction = None
+
+        # If we have a "system" message as our first message, let's pull that out into session
+        # "instructions"
+        if messages[0].get("role") == "system":
+            system = messages.pop(0)
+            content = system.get("content")
+            if isinstance(content, str):
+                system_instruction = content
+            elif isinstance(content, list):
+                system_instruction = content[0].get("text")
+            if not messages:
+                return self.ConvertedMessages(messages=[], system_instruction=system_instruction)
+
+        # If we have just a single "user" item, we can just send it normally
+        if len(messages) == 1 and messages[0].get("role") == "user":
+            return self.ConvertedMessages(
+                messages=[self._from_universal_context_message(messages[0])],
+                system_instruction=system_instruction,
+            )
+
+        # Otherwise, let's pack everything into a single "user" message with a bit of
+        # explanation for the LLM
+        intro_text = """
+        This is a previously saved conversation. Please treat this conversation history as a
+        starting point for the current conversation."""
+
+        trailing_text = """
+        This is the end of the previously saved conversation. Please continue the conversation
+        from here. If the last message is a user instruction or question, act on that instruction
+        or answer the question. If the last message is an assistant response, simple say that you
+        are ready to continue the conversation."""
+
+        return self.ConvertedMessages(
+            messages=[
+                {
+                    "role": "user",
+                    "type": "message",
+                    "content": [
+                        {
+                            "type": "input_text",
+                            "text": "\n\n".join(
+                                [intro_text, json.dumps(messages, indent=2), trailing_text]
+                            ),
+                        }
+                    ],
+                }
+            ],
+            system_instruction=system_instruction,
+        )
+
+    def _from_universal_context_message(
+        self, message: LLMContextMessage
+    ) -> events.ConversationItem:
+        if message.get("role") == "user":
+            content = message.get("content")
+            if isinstance(message.get("content"), list):
+                content = ""
+                for c in message.get("content"):
+                    if c.get("type") == "text":
+                        content += " " + c.get("text")
+                    else:
+                        logger.error(
+                            f"Unhandled content type in context message: {c.get('type')} - {message}"
+                        )
+            return events.ConversationItem(
+                role="user",
+                type="message",
+                content=[events.ItemContent(type="input_text", text=content)],
+            )
+        if message.get("role") == "assistant" and message.get("tool_calls"):
+            tc = message.get("tool_calls")[0]
+            return events.ConversationItem(
+                type="function_call",
+                call_id=tc["id"],
+                name=tc["function"]["name"],
+                arguments=tc["function"]["arguments"],
+            )
+        logger.error(f"Unhandled message type in _from_universal_context_message: {message}")

    @staticmethod
    def _to_openai_realtime_function_format(function: FunctionSchema) -> Dict[str, Any]:
@@ -94,4 +225,18 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
            List of function definitions in OpenAI Realtime format.
        """
        functions_schema = tools_schema.standard_tools
-        return [self._to_openai_realtime_function_format(func) for func in functions_schema]
+        standard_tools = [
+            self._to_openai_realtime_function_format(func) for func in functions_schema
+        ]
+
+        # For backward compatibility, OpenAI Realtime can still be used with
+        # tools in dict format, even though it always uses `LLMContext` under
+        # the hood (via `LLMContext.from_openai_context()`).
+        # To support this behavior, we use "shimmed" custom tools here.
+        # (We maintain this backward compatibility because users aren't
+        # *knowingly* opting into the new `LLMContext`.)
+        shimmed_tools = []
+        if tools_schema.custom_tools:
+            shimmed_tools = tools_schema.custom_tools.get(AdapterType.SHIM, [])
+
+        return standard_tools + shimmed_tools
--- a/src/pipecat/audio/filters/krisp_viva_filter.py
+++ b/src/pipecat/audio/filters/krisp_viva_filter.py
@@ -0,0 +1,193 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+"""Krisp noise reduction audio filter for Pipecat.
+
+This module provides an audio filter implementation using Krisp VIVA SDK.
+"""
+
+import os
+
+import numpy as np
+from loguru import logger
+
+from pipecat.audio.filters.base_audio_filter import BaseAudioFilter
+from pipecat.frames.frames import FilterControlFrame, FilterEnableFrame
+
+try:
+    import krisp_audio
+except ModuleNotFoundError as e:
+    logger.error(f"Exception: {e}")
+    logger.error("In order to use the Krisp filter, you need to install krisp_audio.")
+    raise Exception(f"Missing module: {e}")
+
+
+def _log_callback(log_message, log_level):
+    logger.info(f"[{log_level}] {log_message}")
+
+
+class KrispVivaFilter(BaseAudioFilter):
+    """Audio filter using the Krisp VIVA SDK.
+
+    Provides real-time noise reduction for audio streams using Krisp's
+    proprietary noise suppression algorithms. This filter requires a
+    valid Krisp model file to operate.
+
+    Supported sample rates:
+        - 8000 Hz
+        - 16000 Hz
+        - 24000 Hz
+        - 32000 Hz
+        - 44100 Hz
+        - 48000 Hz
+    """
+
+    # Initialize Krisp Audio SDK globally
+    krisp_audio.globalInit("", _log_callback, krisp_audio.LogLevel.Off)
+    SDK_VERSION = krisp_audio.getVersion()
+    logger.debug(
+        f"Krisp Audio Python SDK Version: {SDK_VERSION.major}."
+        f"{SDK_VERSION.minor}.{SDK_VERSION.patch}"
+    )
+
+    SAMPLE_RATES = {
+        8000: krisp_audio.SamplingRate.Sr8000Hz,
+        16000: krisp_audio.SamplingRate.Sr16000Hz,
+        24000: krisp_audio.SamplingRate.Sr24000Hz,
+        32000: krisp_audio.SamplingRate.Sr32000Hz,
+        44100: krisp_audio.SamplingRate.Sr44100Hz,
+        48000: krisp_audio.SamplingRate.Sr48000Hz,
+    }
+
+    FRAME_SIZE_MS = 10  # Krisp requires audio frames of 10ms duration for processing.
+
+    def __init__(self, model_path: str = None, noise_suppression_level: int = 100) -> None:
+        """Initialize the Krisp noise reduction filter.
+
+        Args:
+            model_path: Path to the Krisp model file (.kef extension).
+                If None, uses KRISP_VIVA_MODEL_PATH environment variable.
+            noise_suppression_level: Noise suppression level.
+
+        Raises:
+            ValueError: If model_path is not provided and KRISP_VIVA_MODEL_PATH is not set.
+            Exception: If model file doesn't have .kef extension.
+            FileNotFoundError: If model file doesn't exist.
+        """
+        super().__init__()
+
+        # Set model path, checking environment if not specified
+        self._model_path = model_path or os.getenv("KRISP_VIVA_MODEL_PATH")
+        if not self._model_path:
+            logger.error("Model path is not provided and KRISP_VIVA_MODEL_PATH is not set.")
+            raise ValueError("Model path for KrispAudioProcessor must be provided.")
+
+        if not self._model_path.endswith(".kef"):
+            raise Exception("Model is expected with .kef extension")
+
+        if not os.path.isfile(self._model_path):
+            raise FileNotFoundError(f"Model file not found: {self._model_path}")
+
+        self._filtering = True
+        self._session = None
+        self._samples_per_frame = None
+        self._noise_suppression_level = noise_suppression_level
+
+        # Audio buffer to accumulate samples for complete frames
+        self._audio_buffer = bytearray()
+
+    def _int_to_sample_rate(self, sample_rate):
+        """Convert integer sample rate to krisp_audio SamplingRate enum.
+
+        Args:
+            sample_rate: Sample rate as integer
+
+        Returns:
+            krisp_audio.SamplingRate enum value
+
+        Raises:
+            ValueError: If sample rate is not supported
+        """
+        if sample_rate not in self.SAMPLE_RATES:
+            raise ValueError("Unsupported sample rate")
+        return self.SAMPLE_RATES[sample_rate]
+
+    async def start(self, sample_rate: int):
+        """Initialize the Krisp processor with the transport's sample rate.
+
+        Args:
+            sample_rate: The sample rate of the input transport in Hz.
+        """
+        model_info = krisp_audio.ModelInfo()
+        model_info.path = self._model_path
+
+        nc_cfg = krisp_audio.NcSessionConfig()
+        nc_cfg.inputSampleRate = self._int_to_sample_rate(sample_rate)
+        nc_cfg.inputFrameDuration = krisp_audio.FrameDuration.Fd10ms
+        nc_cfg.outputSampleRate = nc_cfg.inputSampleRate
+        nc_cfg.modelInfo = model_info
+
+        self._samples_per_frame = int((sample_rate * self.FRAME_SIZE_MS) / 1000)
+        self._session = krisp_audio.NcInt16.create(nc_cfg)
+
+    async def stop(self):
+        """Clean up the Krisp processor when stopping."""
+        self._session = None
+
+    async def process_frame(self, frame: FilterControlFrame):
+        """Process control frames to enable/disable filtering.
+
+        Args:
+            frame: The control frame containing filter commands.
+        """
+        if isinstance(frame, FilterEnableFrame):
+            self._filtering = frame.enable
+
+    async def filter(self, audio: bytes) -> bytes:
+        """Apply Krisp noise reduction to audio data.
+
+        Args:
+            audio: Raw audio data as bytes to be filtered.
+
+        Returns:
+            Noise-reduced audio data as bytes.
+        """
+        if not self._filtering:
+            return audio
+
+        # Add incoming audio to our buffer
+        self._audio_buffer.extend(audio)
+
+        # Calculate how many complete frames we can process
+        total_samples = len(self._audio_buffer) // 2  # 2 bytes per int16 sample
+        num_complete_frames = total_samples // self._samples_per_frame
+
+        if num_complete_frames == 0:
+            # Not enough samples for a complete frame yet, return empty
+            return b""
+
+        # Calculate how many bytes we need for complete frames
+        complete_samples_count = num_complete_frames * self._samples_per_frame
+        bytes_to_process = complete_samples_count * 2  # 2 bytes per sample
+
+        # Extract the bytes we can process
+        audio_to_process = bytes(self._audio_buffer[:bytes_to_process])
+
+        # Remove processed bytes from buffer, keep the remainder
+        self._audio_buffer = self._audio_buffer[bytes_to_process:]
+
+        # Process the complete frames
+        samples = np.frombuffer(audio_to_process, dtype=np.int16)
+        frames = samples.reshape(-1, self._samples_per_frame)
+        processed_samples = np.empty_like(samples)
+
+        for i, frame in enumerate(frames):
+            cleaned_frame = self._session.process(frame, self._noise_suppression_level)
+            processed_samples[i * self._samples_per_frame : (i + 1) * self._samples_per_frame] = (
+                cleaned_frame
+            )
+
+        return processed_samples.tobytes()
--- a/src/pipecat/audio/turn/base_turn_analyzer.py
+++ b/src/pipecat/audio/turn/base_turn_analyzer.py
@@ -14,6 +14,8 @@ from abc import ABC, abstractmethod
 from enum import Enum
 from typing import Optional, Tuple

+from pydantic import BaseModel
+
 from pipecat.metrics.metrics import MetricsData


@@ -29,6 +31,12 @@ class EndOfTurnState(Enum):
    INCOMPLETE = 2


+class BaseTurnParams(BaseModel):
+    """Base class for turn analyzer parameters."""
+
+    pass
+
+
 class BaseTurnAnalyzer(ABC):
    """Abstract base class for analyzing user end of turn.

@@ -78,7 +86,7 @@ class BaseTurnAnalyzer(ABC):

    @property
    @abstractmethod
-    def params(self):
+    def params(self) -> BaseTurnParams:
        """Get the current turn analyzer parameters.

        Returns:
--- a/src/pipecat/audio/turn/smart_turn/base_smart_turn.py
+++ b/src/pipecat/audio/turn/smart_turn/base_smart_turn.py
@@ -11,15 +11,17 @@ machine learning models to determine when a user has finished speaking, going
 beyond simple silence-based detection.
 """

+import asyncio
 import time
 from abc import abstractmethod
+from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Dict, Optional, Tuple

 import numpy as np
 from loguru import logger
 from pydantic import BaseModel

-from pipecat.audio.turn.base_turn_analyzer import BaseTurnAnalyzer, EndOfTurnState
+from pipecat.audio.turn.base_turn_analyzer import BaseTurnAnalyzer, BaseTurnParams, EndOfTurnState
 from pipecat.metrics.metrics import MetricsData, SmartTurnMetricsData

 # Default timing parameters
@@ -29,7 +31,7 @@ MAX_DURATION_SECONDS = 8  # Max allowed segment duration
 USE_ONLY_LAST_VAD_SEGMENT = True


-class SmartTurnParams(BaseModel):
+class SmartTurnParams(BaseTurnParams):
    """Configuration parameters for smart turn analysis.

    Parameters:
@@ -77,6 +79,9 @@ class BaseSmartTurn(BaseTurnAnalyzer):
        self._speech_triggered = False
        self._silence_ms = 0
        self._speech_start_time = 0
+        # Thread executor that will run the model. We only need one thread per
+        # analyzer because one analyzer just handles one audio stream.
+        self._executor = ThreadPoolExecutor(max_workers=1)

    @property
    def speech_triggered(self) -> bool:
@@ -151,7 +156,10 @@ class BaseSmartTurn(BaseTurnAnalyzer):
            Tuple containing the end-of-turn state and optional metrics data
            from the ML model analysis.
        """
-        state, result = await self._process_speech_segment(self._audio_buffer)
+        loop = asyncio.get_running_loop()
+        state, result = await loop.run_in_executor(
+            self._executor, self._process_speech_segment, self._audio_buffer
+        )
        if state == EndOfTurnState.COMPLETE or USE_ONLY_LAST_VAD_SEGMENT:
            self._clear(state)
        logger.debug(f"End of Turn result: {state}")
@@ -169,9 +177,7 @@ class BaseSmartTurn(BaseTurnAnalyzer):
        self._speech_start_time = 0
        self._silence_ms = 0

-    async def _process_speech_segment(
-        self, audio_buffer
-    ) -> Tuple[EndOfTurnState, Optional[MetricsData]]:
+    def _process_speech_segment(self, audio_buffer) -> Tuple[EndOfTurnState, Optional[MetricsData]]:
        """Process accumulated audio segment using ML model."""
        state = EndOfTurnState.INCOMPLETE

@@ -203,7 +209,7 @@ class BaseSmartTurn(BaseTurnAnalyzer):
        if len(segment_audio) > 0:
            start_time = time.perf_counter()
            try:
-                result = await self._predict_endpoint(segment_audio)
+                result = self._predict_endpoint(segment_audio)
                state = (
                    EndOfTurnState.COMPLETE
                    if result["prediction"] == 1
@@ -249,6 +255,6 @@ class BaseSmartTurn(BaseTurnAnalyzer):
        return state, result_data

    @abstractmethod
-    async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+    def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
        """Predict end-of-turn using ML model from audio data."""
        pass
--- a/src/pipecat/audio/turn/smart_turn/http_smart_turn.py
+++ b/src/pipecat/audio/turn/smart_turn/http_smart_turn.py
@@ -104,11 +104,15 @@ class HttpSmartTurnAnalyzer(BaseSmartTurn):
            logger.error(f"Failed to send raw request to Daily Smart Turn: {e}")
            raise Exception("Failed to send raw request to Daily Smart Turn.")

-    async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+    def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
        """Predict end-of-turn using remote HTTP ML service."""
        try:
            serialized_array = self._serialize_array(audio_array)
-            return await self._send_raw_request(serialized_array)
+            loop = asyncio.get_running_loop()
+            future = asyncio.run_coroutine_threadsafe(
+                self._send_raw_request(serialized_array), loop
+            )
+            return future.result()
        except Exception as e:
            logger.error(f"Smart turn prediction failed: {str(e)}")
            # Return an incomplete prediction when a failure occurs
--- a/src/pipecat/audio/turn/smart_turn/local_smart_turn.py
+++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn.py
@@ -64,7 +64,7 @@ class LocalSmartTurnAnalyzer(BaseSmartTurn):
        self._turn_model.eval()
        logger.debug("Loaded Local Smart Turn")

-    async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+    def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
        """Predict end-of-turn using local PyTorch model."""
        inputs = self._turn_processor(
            audio_array,
--- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v2.py
+++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v2.py
@@ -73,7 +73,7 @@ class LocalSmartTurnAnalyzerV2(BaseSmartTurn):
        self._turn_model.eval()
        logger.debug("Loaded Local Smart Turn v2")

-    async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+    def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
        """Predict end-of-turn using local PyTorch model."""
        inputs = self._turn_processor(
            audio_array,
--- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py
+++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py
@@ -35,12 +35,15 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn):
    enabling offline operation without network dependencies.
    """

-    def __init__(self, *, smart_turn_model_path: Optional[str] = None, **kwargs):
+    def __init__(
+        self, *, smart_turn_model_path: Optional[str] = None, cpu_count: int = 1, **kwargs
+    ):
        """Initialize the local ONNX smart-turn-v3 analyzer.

        Args:
            smart_turn_model_path: Path to the ONNX model file. If this is not
                set, the bundled smart-turn-v3.0 model will be used.
+            cpu_count: The number of CPUs to use for inference. Defaults to 1.
            **kwargs: Additional arguments passed to BaseSmartTurn.
        """
        super().__init__(**kwargs)
@@ -70,6 +73,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn):
        so = ort.SessionOptions()
        so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
        so.inter_op_num_threads = 1
+        so.intra_op_num_threads = cpu_count
        so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL

        self._feature_extractor = WhisperFeatureExtractor(chunk_length=8)
@@ -77,7 +81,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn):

        logger.debug("Loaded Local Smart Turn v3")

-    async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+    def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
        """Predict end-of-turn using local ONNX model."""

        def truncate_audio_to_last_n_seconds(audio_array, n_seconds=8, sample_rate=16000):
--- a/src/pipecat/audio/vad/vad_analyzer.py
+++ b/src/pipecat/audio/vad/vad_analyzer.py
@@ -11,7 +11,9 @@ data structures for voice activity detection in audio streams. Includes state
 management, parameter configuration, and audio analysis framework.
 """

+import asyncio
 from abc import ABC, abstractmethod
+from concurrent.futures import ThreadPoolExecutor
 from enum import Enum
 from typing import Optional

@@ -84,6 +86,10 @@ class VADAnalyzer(ABC):
        self._smoothing_factor = 0.2
        self._prev_volume = 0

+        # Thread executor that will run the model. We only need one thread per
+        # analyzer because one analyzer just handles one audio stream.
+        self._executor = ThreadPoolExecutor(max_workers=1)
+
    @property
    def sample_rate(self) -> int:
        """Get the current sample rate.
@@ -165,7 +171,7 @@ class VADAnalyzer(ABC):
        volume = calculate_audio_volume(audio, self.sample_rate)
        return exp_smoothing(volume, self._prev_volume, self._smoothing_factor)

-    def analyze_audio(self, buffer) -> VADState:
+    async def analyze_audio(self, buffer: bytes) -> VADState:
        """Analyze audio buffer and return current VAD state.

        Processes incoming audio data, maintains internal state, and determines
@@ -177,6 +183,12 @@ class VADAnalyzer(ABC):
        Returns:
            Current VAD state after processing the buffer.
        """
+        loop = asyncio.get_running_loop()
+        state = await loop.run_in_executor(self._executor, self._run_analyzer, buffer)
+        return state
+
+    def _run_analyzer(self, buffer: bytes) -> VADState:
+        """Analyze audio buffer and return current VAD state."""
        self._vad_buffer += buffer

        num_required_bytes = self._vad_frames_num_bytes
--- a/src/pipecat/frames/frames.py
+++ b/src/pipecat/frames/frames.py
@@ -672,7 +672,7 @@ class TTSSpeakFrame(DataFrame):


@dataclass
-class TransportMessageFrame(DataFrame):
+class OutputTransportMessageFrame(DataFrame):
    """Frame containing transport-specific message data.

    Parameters:
@@ -685,6 +685,32 @@ class TransportMessageFrame(DataFrame):
        return f"{self.name}(message: {self.message})"


+@dataclass
+class TransportMessageFrame(OutputTransportMessageFrame):
+    """Frame containing transport-specific message data.
+
+    .. deprecated:: 0.0.87
+        This frame is deprecated and will be removed in a future version.
+        Instead, use `OutputTransportMessageFrame`.
+
+    Parameters:
+        message: The transport message payload.
+    """
+
+    def __post_init__(self):
+        super().__post_init__()
+        import warnings
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "TransportMessageFrame is deprecated and will be removed in a future version. "
+                "Instead, use OutputTransportMessageFrame.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
+
@dataclass
 class DTMFFrame:
    """Base class for DTMF (Dual-Tone Multi-Frequency) keypad frames.
@@ -747,9 +773,15 @@ class CancelFrame(SystemFrame):

    Indicates that a pipeline needs to stop right away without
    processing remaining queued frames.
+
+    Parameters:
+        reason: Optional reason for pushing a cancel frame.
    """

-    pass
+    reason: Optional[str] = None
+
+    def __str__(self):
+        return f"{self.name}(reason: {self.reason})"


@dataclass
@@ -1092,8 +1124,8 @@ class STTMuteFrame(SystemFrame):


@dataclass
-class TransportMessageUrgentFrame(SystemFrame):
-    """Frame for urgent transport messages that need immediate processing.
+class InputTransportMessageFrame(SystemFrame):
+    """Frame for transport messages received from external sources.

    Parameters:
        message: The urgent transport message payload.
@@ -1106,46 +1138,112 @@ class TransportMessageUrgentFrame(SystemFrame):


@dataclass
-class InputTransportMessageUrgentFrame(TransportMessageUrgentFrame):
+class InputTransportMessageUrgentFrame(InputTransportMessageFrame):
    """Frame for transport messages received from external sources.

-    This frame wraps incoming transport messages to distinguish them from outgoing
-    urgent transport messages (TransportMessageUrgentFrame), preventing infinite
-    message loops in the transport layer. It inherits the message payload from
-    TransportMessageFrame while marking the message as having been received
-    rather than generated locally.
+    .. deprecated:: 0.0.87
+        This frame is deprecated and will be removed in a future version.
+        Instead, use `InputTransportMessageFrame`.

-    Used by transport implementations to properly handle bidirectional message
-    flow without creating feedback loops.
+    Parameters:
+        message: The urgent transport message payload.
    """

-    pass
+    def __post_init__(self):
+        super().__post_init__()
+        import warnings
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "InputTransportMessageUrgentFrame is deprecated and will be removed in a future version. "
+                "Instead, use InputTransportMessageFrame.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
+
+@dataclass
+class OutputTransportMessageUrgentFrame(SystemFrame):
+    """Frame for urgent transport messages that need to be sent immediately.
+
+    Parameters:
+        message: The urgent transport message payload.
+    """
+
+    message: Any
+
+    def __str__(self):
+        return f"{self.name}(message: {self.message})"
+
+
+@dataclass
+class TransportMessageUrgentFrame(OutputTransportMessageUrgentFrame):
+    """Frame for urgent transport messages that need to be sent immediately.
+
+    .. deprecated:: 0.0.87
+        This frame is deprecated and will be removed in a future version.
+        Instead, use `OutputTransportMessageUrgentFrame`.
+
+    Parameters:
+        message: The urgent transport message payload.
+    """
+
+    def __post_init__(self):
+        super().__post_init__()
+        import warnings
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "TransportMessageUrgentFrame is deprecated and will be removed in a future version. "
+                "Instead, use OutputTransportMessageFrame.",
+                DeprecationWarning,
+                stacklevel=2,
+            )


@dataclass
 class UserImageRequestFrame(SystemFrame):
    """Frame requesting an image from a specific user.

-    A frame to request an image from the given user. The frame might be
-    generated by a function call in which case the corresponding fields will be
-    properly set.
+    A frame to request an image from the given user. The request might come with
+    a text that can be later used to describe the requested image.

    Parameters:
        user_id: Identifier of the user to request image from.
-        context: Optional context for the image request.
-        function_name: Name of function that generated this request (if any).
-        tool_call_id: Tool call ID if generated by function call.
+        text: An optional text associated to the image request.
+        append_to_context: Whether the requested image should be appended to the LLM context.
        video_source: Specific video source to capture from.
+        context: [DEPRECATED] Optional context for the image request.
+        function_name: [DEPRECATED] Name of function that generated this request (if any).
+        tool_call_id: [DEPRECATED] Tool call ID if generated by function call.
    """

    user_id: str
+    text: Optional[str] = None
+    append_to_context: Optional[bool] = None
+    video_source: Optional[str] = None
    context: Optional[Any] = None
    function_name: Optional[str] = None
    tool_call_id: Optional[str] = None
-    video_source: Optional[str] = None
+
+    def __post_init__(self):
+        super().__post_init__()
+
+        if self.context or self.function_name or self.tool_call_id:
+            import warnings
+
+            with warnings.catch_warnings():
+                warnings.simplefilter("always")
+                warnings.warn(
+                    "`UserImageRequestFrame` fields `context`, `function_name` and `tool_call_id` are deprecated.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )

    def __str__(self):
-        return f"{self.name}(user: {self.user_id}, video_source: {self.video_source}, function: {self.function_name}, request: {self.tool_call_id})"
+        return f"{self.name}(user: {self.user_id}, text: {self.text}, append_to_context: {self.append_to_context}, {self.video_source})"


@dataclass
@@ -1219,15 +1317,33 @@ class UserImageRawFrame(InputImageRawFrame):

    Parameters:
        user_id: Identifier of the user who provided this image.
-        request: The original image request frame if this is a response.
+        text: An optional text associated to this image.
+        append_to_context: Whether the requested image should be appended to the LLM context.
+        request: [DEPRECATED] The original image request frame if this is a response.
    """

    user_id: str = ""
+    text: Optional[str] = None
+    append_to_context: Optional[bool] = None
    request: Optional[UserImageRequestFrame] = None

+    def __post_init__(self):
+        super().__post_init__()
+
+        if self.request:
+            import warnings
+
+            with warnings.catch_warnings():
+                warnings.simplefilter("always")
+                warnings.warn(
+                    "`UserImageRawFrame` field `request` is deprecated.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
+
    def __str__(self):
        pts = format_pts(self.pts)
-        return f"{self.name}(pts: {pts}, user: {self.user_id}, source: {self.transport_source}, size: {self.size}, format: {self.format}, request: {self.request})"
+        return f"{self.name}(pts: {pts}, user: {self.user_id}, source: {self.transport_source}, size: {self.size}, format: {self.format}, text: {self.text}, append_to_context: {self.append_to_context})"


@dataclass
@@ -1292,9 +1408,15 @@ class EndTaskFrame(TaskFrame):
    This is used to notify the pipeline task that the pipeline should be
    closed nicely (flushing all the queued frames) by pushing an EndFrame
    downstream. This frame should be pushed upstream.
+
+    Parameters:
+        reason: Optional reason for pushing an end frame.
    """

-    pass
+    reason: Optional[str] = None
+
+    def __str__(self):
+        return f"{self.name}(reason: {self.reason})"


@dataclass
@@ -1304,9 +1426,15 @@ class CancelTaskFrame(TaskFrame):
    This is used to notify the pipeline task that the pipeline should be
    stopped immediately by pushing a CancelFrame downstream. This frame
    should be pushed upstream.
+
+    Parameters:
+        reason: Optional reason for pushing a cancel frame.
    """

-    pass
+    reason: Optional[str] = None
+
+    def __str__(self):
+        return f"{self.name}(reason: {self.reason})"


@dataclass
@@ -1377,9 +1505,15 @@ class EndFrame(ControlFrame):
    sending frames to its output channel(s) and close all its threads. Note,
    that this is a control frame, which means it will be received in the order it
    was sent.
+
+    Parameters:
+        reason: Optional reason for pushing an end frame.
    """

-    pass
+    reason: Optional[str] = None
+
+    def __str__(self):
+        return f"{self.name}(reason: {self.reason})"


@dataclass
--- a/src/pipecat/observers/loggers/metrics_log_observer.py
+++ b/src/pipecat/observers/loggers/metrics_log_observer.py
@@ -0,0 +1,218 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+"""Metrics logging observer for Pipecat.
+
+This module provides an observer that logs metrics frames to the console,
+allowing developers to monitor performance metrics, token usage, and other
+statistics in real-time.
+"""
+
+from typing import Optional, Set, Type
+
+from loguru import logger
+
+from pipecat.frames.frames import MetricsFrame
+from pipecat.metrics.metrics import (
+    LLMTokenUsage,
+    LLMUsageMetricsData,
+    MetricsData,
+    ProcessingMetricsData,
+    SmartTurnMetricsData,
+    TTFBMetricsData,
+    TTSUsageMetricsData,
+)
+from pipecat.observers.base_observer import BaseObserver, FramePushed
+
+
+class MetricsLogObserver(BaseObserver):
+    """Observer to log metrics activity to the console.
+
+    Monitors and logs all MetricsFrame instances, including:
+
+    - TTFBMetricsData (Time To First Byte)
+    - ProcessingMetricsData (General processing time)
+    - LLMUsageMetricsData (Token usage statistics)
+    - TTSUsageMetricsData (Text-to-Speech character counts)
+    - SmartTurnMetricsData (Turn prediction metrics)
+
+    This allows developers to track performance metrics, token usage,
+    and other statistics throughout the pipeline.
+
+    Examples:
+        Log all metrics types::
+
+            observers = [MetricsLogObserver()]
+
+        Log only LLM and TTS metrics::
+
+            from pipecat.metrics.metrics import LLMUsageMetricsData, TTSUsageMetricsData
+            observers = [
+                MetricsLogObserver(
+                    include_metrics={LLMUsageMetricsData, TTSUsageMetricsData}
+                )
+            ]
+    """
+
+    def __init__(
+        self,
+        include_metrics: Optional[Set[Type[MetricsData]]] = None,
+        **kwargs,
+    ):
+        """Initialize the metrics log observer.
+
+        Args:
+            include_metrics: Set of metrics types to include. If specified, only these
+                metrics types will be logged. If None, all metrics are logged.
+            **kwargs: Additional arguments passed to parent class.
+        """
+        super().__init__(**kwargs)
+        self._include_metrics = include_metrics
+        self._frames_seen = set()
+
+    async def on_push_frame(self, data: FramePushed):
+        """Handle frame push events and log metrics frames.
+
+        Logs MetricsFrame instances with detailed information about the
+        metrics data, formatted appropriately for each metrics type.
+
+        Args:
+            data: Frame push event data containing source, frame, and timestamp.
+        """
+        frame = data.frame
+        timestamp = data.timestamp
+
+        if not isinstance(frame, MetricsFrame):
+            return
+
+        # Skip frames we've already seen to avoid duplicate logging
+        if frame.id in self._frames_seen:
+            return
+
+        self._frames_seen.add(frame.id)
+
+        time_sec = timestamp / 1_000_000_000
+
+        # Process each metrics data item in the frame
+        for metrics_data in frame.data:
+            # Check if this metrics type should be logged
+            if not self._should_log_metrics(metrics_data):
+                continue
+
+            self._log_metrics_data(metrics_data, time_sec)
+
+    def _should_log_metrics(self, metrics_data: MetricsData) -> bool:
+        """Determine if a metrics data item should be logged based on filters.
+
+        Args:
+            metrics_data: The metrics data to check.
+
+        Returns:
+            True if the metrics should be logged, False otherwise.
+        """
+        # If include_metrics is specified, only log those types
+        if self._include_metrics is not None:
+            return type(metrics_data) in self._include_metrics
+
+        # Otherwise, log all metrics
+        return True
+
+    def _log_metrics_data(self, metrics_data: MetricsData, time_sec: float):
+        """Log a single metrics data item.
+
+        Args:
+            metrics_data: The metrics data to log.
+            time_sec: Timestamp in seconds.
+        """
+        processor_info = f"[{metrics_data.processor}]"
+        model_info = f" ({metrics_data.model})" if metrics_data.model else ""
+
+        if isinstance(metrics_data, TTFBMetricsData):
+            logger.debug(
+                f"📊 {processor_info} TTFB{model_info}: {metrics_data.value}s at {time_sec:.3f}s"
+            )
+        elif isinstance(metrics_data, ProcessingMetricsData):
+            logger.debug(
+                f"📊 {processor_info} PROCESSING TIME{model_info}: {metrics_data.value}s at {time_sec:.3f}s"
+            )
+        elif isinstance(metrics_data, LLMUsageMetricsData):
+            self._log_llm_usage(metrics_data, processor_info, model_info, time_sec)
+        elif isinstance(metrics_data, TTSUsageMetricsData):
+            logger.debug(
+                f"📊 {processor_info} TTS USAGE{model_info}: {metrics_data.value} characters at {time_sec:.3f}s"
+            )
+        elif isinstance(metrics_data, SmartTurnMetricsData):
+            self._log_smart_turn(metrics_data, processor_info, model_info, time_sec)
+        else:
+            # Generic fallback for unknown metrics types
+            logger.debug(
+                f"📊 {processor_info} METRICS{model_info}: {metrics_data} at {time_sec:.3f}s"
+            )
+
+    def _log_llm_usage(
+        self,
+        metrics_data: LLMUsageMetricsData,
+        processor_info: str,
+        model_info: str,
+        time_sec: float,
+    ):
+        """Log LLM token usage metrics.
+
+        Args:
+            metrics_data: The LLM usage metrics data.
+            processor_info: Formatted processor name string.
+            model_info: Formatted model name string.
+            time_sec: Timestamp in seconds.
+        """
+        usage: LLMTokenUsage = metrics_data.value
+
+        # Build usage details
+        details = [
+            f"prompt: {usage.prompt_tokens}",
+            f"completion: {usage.completion_tokens}",
+            f"total: {usage.total_tokens}",
+        ]
+
+        if usage.cache_read_input_tokens is not None:
+            details.append(f"cache_read: {usage.cache_read_input_tokens}")
+
+        if usage.cache_creation_input_tokens is not None:
+            details.append(f"cache_creation: {usage.cache_creation_input_tokens}")
+
+        if usage.reasoning_tokens is not None:
+            details.append(f"reasoning: {usage.reasoning_tokens}")
+
+        usage_str = ", ".join(details)
+
+        logger.debug(
+            f"📊 {processor_info} LLM TOKEN USAGE{model_info}: {usage_str} at {time_sec:.2f}s"
+        )
+
+    def _log_smart_turn(
+        self,
+        metrics_data: SmartTurnMetricsData,
+        processor_info: str,
+        model_info: str,
+        time_sec: float,
+    ):
+        """Log smart turn prediction metrics.
+
+        Args:
+            metrics_data: The smart turn metrics data.
+            processor_info: Formatted processor name string.
+            model_info: Formatted model name string.
+            time_sec: Timestamp in seconds.
+        """
+        complete_str = "COMPLETE" if metrics_data.is_complete else "INCOMPLETE"
+
+        logger.debug(
+            f"📊 {processor_info} SMART TURN{model_info}: {complete_str} "
+            f"(probability: {metrics_data.probability:.2%}, "
+            f"inference: {metrics_data.inference_time_ms:.1f}ms, "
+            f"server: {metrics_data.server_total_time_ms:.1f}ms, "
+            f"e2e: {metrics_data.e2e_processing_time_ms:.1f}ms) "
+            f"at {time_sec:.2f}s"
+        )
--- a/src/pipecat/pipeline/llm_switcher.py
+++ b/src/pipecat/pipeline/llm_switcher.py
@@ -8,26 +8,48 @@

 from typing import Any, List, Optional, Type

+from pipecat.adapters.schemas.direct_function import DirectFunction
 from pipecat.pipeline.service_switcher import ServiceSwitcher, StrategyType
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.services.llm_service import LLMService


 class LLMSwitcher(ServiceSwitcher[StrategyType]):
-    """A pipeline that switches between different LLMs at runtime."""
+    """A pipeline that switches between different LLMs at runtime.
+
+    Example::
+
+        llm_switcher = LLMSwitcher(
+            llms=[openai_llm, anthropic_llm],
+            strategy_type=ServiceSwitcherStrategyManual
+        )
+    """

    def __init__(self, llms: List[LLMService], strategy_type: Type[StrategyType]):
-        """Initialize the service switcher with a list of LLMs and a switching strategy."""
+        """Initialize the service switcher with a list of LLMs and a switching strategy.
+
+        Args:
+            llms: List of LLM services to switch between.
+            strategy_type: The strategy class to use for switching between LLMs.
+        """
        super().__init__(llms, strategy_type)

    @property
    def llms(self) -> List[LLMService]:
-        """Get the list of LLMs managed by this switcher."""
+        """Get the list of LLMs managed by this switcher.
+
+        Returns:
+            List of LLM services managed by this switcher.
+        """
        return self.services

    @property
    def active_llm(self) -> Optional[LLMService]:
-        """Get the currently active LLM, if any."""
+        """Get the currently active LLM.
+
+        Returns:
+            The currently active LLM service, or None if no LLM is active.
+        """
        return self.strategy.active_service

    async def run_inference(self, context: LLMContext) -> Optional[str]:
@@ -74,3 +96,22 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]):
                start_callback=start_callback,
                cancel_on_interruption=cancel_on_interruption,
            )
+
+    def register_direct_function(
+        self,
+        handler: DirectFunction,
+        *,
+        cancel_on_interruption: bool = True,
+    ):
+        """Register a direct function handler for LLM function calls, on all LLMs, active or not.
+
+        Args:
+            handler: The direct function to register. Must follow DirectFunction protocol.
+            cancel_on_interruption: Whether to cancel this function call when an
+                interruption occurs. Defaults to True.
+        """
+        for llm in self.llms:
+            llm.register_direct_function(
+                handler=handler,
+                cancel_on_interruption=cancel_on_interruption,
+            )
--- a/src/pipecat/pipeline/runner.py
+++ b/src/pipecat/pipeline/runner.py
@@ -70,11 +70,15 @@ class PipelineRunner(BaseObject):
        """
        logger.debug(f"Runner {self} started running {task}")
        self._tasks[task.name] = task
-        params = PipelineTaskParams(loop=self._loop)
+
+        # PipelineTask handles asyncio.CancelledError to shutdown the pipeline
+        # properly and re-raises it in case there's more cleanup to do.
        try:
+            params = PipelineTaskParams(loop=self._loop)
            await task.run(params)
        except asyncio.CancelledError:
-            await self._cancel()
+            pass
+
        del self._tasks[task.name]

        # Cleanup base object.
--- a/src/pipecat/pipeline/service_switcher.py
+++ b/src/pipecat/pipeline/service_switcher.py
@@ -21,10 +21,22 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor


 class ServiceSwitcherStrategy:
-    """Base class for service switching strategies."""
+    """Base class for service switching strategies.
+
+    Note:
+        Strategy classes are instantiated internally by ServiceSwitcher.
+        Developers should pass the strategy class (not an instance) to ServiceSwitcher.
+    """

    def __init__(self, services: List[FrameProcessor]):
-        """Initialize the service switcher strategy with a list of services."""
+        """Initialize the service switcher strategy with a list of services.
+
+        Note:
+            This is called internally by ServiceSwitcher. Do not instantiate directly.
+
+        Args:
+            services: List of frame processors to switch between.
+        """
        self.services = services
        self.active_service: Optional[FrameProcessor] = None

@@ -46,10 +58,24 @@ class ServiceSwitcherStrategyManual(ServiceSwitcherStrategy):

    This strategy allows the user to manually select which service is active.
    The initial active service is the first one in the list.
+
+    Example::
+
+        stt_switcher = ServiceSwitcher(
+            services=[stt_1, stt_2],
+            strategy_type=ServiceSwitcherStrategyManual
+        )
    """

    def __init__(self, services: List[FrameProcessor]):
-        """Initialize the manual service switcher strategy with a list of services."""
+        """Initialize the manual service switcher strategy with a list of services.
+
+        Note:
+            This is called internally by ServiceSwitcher. Do not instantiate directly.
+
+        Args:
+            services: List of frame processors to switch between.
+        """
        super().__init__(services)
        self.active_service = services[0] if services else None

@@ -85,7 +111,12 @@ class ServiceSwitcher(ParallelPipeline, Generic[StrategyType]):
    """A pipeline that switches between different services at runtime."""

    def __init__(self, services: List[FrameProcessor], strategy_type: Type[StrategyType]):
-        """Initialize the service switcher with a list of services and a switching strategy."""
+        """Initialize the service switcher with a list of services and a switching strategy.
+
+        Args:
+            services: List of frame processors to switch between.
+            strategy_type: The strategy class to use for switching between services.
+        """
        strategy = strategy_type(services)
        super().__init__(*self._make_pipeline_definitions(services, strategy))
        self.services = services
@@ -100,14 +131,20 @@ class ServiceSwitcher(ParallelPipeline, Generic[StrategyType]):
            active_service: FrameProcessor,
            direction: FrameDirection,
        ):
-            """Initialize the service switcher filter with a strategy and direction."""
+            """Initialize the service switcher filter with a strategy and direction.
+
+            Args:
+                wrapped_service: The service that this filter wraps.
+                active_service: The currently active service.
+                direction: The direction of frame flow to filter.
+            """
+            self._wrapped_service = wrapped_service
+            self._active_service = active_service

            async def filter(_: Frame) -> bool:
                return self._wrapped_service == self._active_service

-            super().__init__(filter, direction)
-            self._wrapped_service = wrapped_service
-            self._active_service = active_service
+            super().__init__(filter, direction, filter_system_frames=True)

        async def process_frame(self, frame, direction):
            """Process a frame through the filter, handling special internal filter-updating frames."""
--- a/src/pipecat/pipeline/task.py
+++ b/src/pipecat/pipeline/task.py
@@ -12,9 +12,10 @@ including heartbeats, idle detection, and observer integration.
 """

 import asyncio
-import time
-from collections import deque
-from typing import Any, AsyncIterable, Deque, Dict, Iterable, List, Optional, Tuple, Type
+import importlib.util
+import os
+from pathlib import Path
+from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Tuple, Type

 from loguru import logger
 from pydantic import BaseModel, ConfigDict, Field
@@ -31,7 +32,6 @@ from pipecat.frames.frames import (
    ErrorFrame,
    Frame,
    HeartbeatFrame,
-    InputAudioRawFrame,
    InterruptionFrame,
    InterruptionTaskFrame,
    MetricsFrame,
@@ -41,7 +41,7 @@ from pipecat.frames.frames import (
    UserSpeakingFrame,
 )
 from pipecat.metrics.metrics import ProcessingMetricsData, TTFBMetricsData
-from pipecat.observers.base_observer import BaseObserver
+from pipecat.observers.base_observer import BaseObserver, FramePushed
 from pipecat.observers.turn_tracking_observer import TurnTrackingObserver
 from pipecat.pipeline.base_task import BasePipelineTask, PipelineTaskParams
 from pipecat.pipeline.pipeline import Pipeline, PipelineSink, PipelineSource
@@ -59,6 +59,43 @@ IDLE_TIMEOUT_SECS = 300
 CANCEL_TIMEOUT_SECS = 20.0


+class IdleFrameObserver(BaseObserver):
+    """Idle timeout observer.
+
+    This observer waits for specific frames being generated in the pipeline. If
+    the frames are generated the given asyncio event is set. If the event is not
+    set it means the pipeline is probably idle.
+
+    """
+
+    def __init__(self, *, idle_event: asyncio.Event, idle_timeout_frames: Tuple[Type[Frame], ...]):
+        """Initialize the observer.
+
+        Args:
+            idle_event: The event to set if the idle timeout frames are being pushed.
+            idle_timeout_frames: A tuple with the frames that should set the event when received
+        """
+        super().__init__()
+        self._idle_event = idle_event
+        self._idle_timeout_frames = idle_timeout_frames
+        self._processed_frames = set()
+
+    async def on_push_frame(self, data: FramePushed):
+        """Callback executed when a frame is pushed in the pipeline.
+
+        Args:
+            data: The frame push event data.
+        """
+        # Skip already processed frames
+        if data.frame.id in self._processed_frames:
+            return
+
+        self._processed_frames.add(data.frame.id)
+
+        if isinstance(data.frame, StartFrame) or isinstance(data.frame, self._idle_timeout_frames):
+            self._idle_event.set()
+
+
 class PipelineParams(BaseModel):
    """Configuration parameters for pipeline execution.

@@ -132,12 +169,16 @@ class PipelineTask(BasePipelineTask):

    - on_pipeline_finished: Called after the pipeline has reached any terminal state.
          This includes:
+
              - StopFrame: pipeline was stopped (processors keep connections open)
              - EndFrame: pipeline ended normally
              - CancelFrame: pipeline was cancelled
+
          Use this event for cleanup, logging, or post-processing tasks. Users can inspect
          the frame if they need to handle specific cases.

+    - on_pipeline_error: Called when an error occurs with ErrorFrame
+
    Example::

        @task.event_handler("on_frame_reached_upstream")
@@ -148,9 +189,17 @@ class PipelineTask(BasePipelineTask):
        async def on_pipeline_idle_timeout(task):
            ...

+        @task.event_handler("on_pipeline_started")
+        async def on_pipeline_started(task, frame):
+            ...
+
        @task.event_handler("on_pipeline_finished")
        async def on_pipeline_finished(task, frame):
            ...
+
+        @task.event_handler("on_pipeline_error")
+        async def on_pipeline_error(task, frame):
+            ...
    """

    def __init__(
@@ -205,7 +254,6 @@ class PipelineTask(BasePipelineTask):
        self._conversation_id = conversation_id
        self._enable_tracing = enable_tracing and is_tracing_available()
        self._enable_turn_tracking = enable_turn_tracking
-        self._idle_timeout_frames = idle_timeout_frames
        self._idle_timeout_secs = idle_timeout_secs
        if self._params.observers:
            import warnings
@@ -240,16 +288,24 @@ class PipelineTask(BasePipelineTask):
        # This queue is the queue used to push frames to the pipeline.
        self._push_queue = asyncio.Queue()
        self._process_push_task: Optional[asyncio.Task] = None
+
        # This is the heartbeat queue. When a heartbeat frame is received in the
        # down queue we add it to the heartbeat queue for processing.
        self._heartbeat_queue = asyncio.Queue()
        self._heartbeat_push_task: Optional[asyncio.Task] = None
        self._heartbeat_monitor_task: Optional[asyncio.Task] = None
-        # This is the idle queue. When frames are received downstream they are
-        # put in the queue. If no frame is received the pipeline is considered
-        # idle.
-        self._idle_queue = asyncio.Queue()
+
+        # This is the idle event. When selected frames are pushed from any
+        # processor we consider the pipeline is not idle. We use an observer
+        # which will be listening any part of the pipeline.
+        self._idle_event = asyncio.Event()
        self._idle_monitor_task: Optional[asyncio.Task] = None
+        if self._idle_timeout_secs:
+            idle_frame_observer = IdleFrameObserver(
+                idle_event=self._idle_event,
+                idle_timeout_frames=idle_timeout_frames,
+            )
+            observers.append(idle_frame_observer)

        # This event is used to indicate the StartFrame has been received at the
        # end of the pipeline.
@@ -259,6 +315,9 @@ class PipelineTask(BasePipelineTask):
        # StopFrame) has been received at the end of the pipeline.
        self._pipeline_end_event = asyncio.Event()

+        # This event is set when the pipeline truly finishes.
+        self._pipeline_finished_event = asyncio.Event()
+
        # This is the final pipeline. It is composed of a source processor,
        # followed by the user pipeline, and ending with a sink processor. The
        # source allows us to receive and react to upstream frames, and the sink
@@ -288,6 +347,7 @@ class PipelineTask(BasePipelineTask):
        self._register_event_handler("on_pipeline_ended")
        self._register_event_handler("on_pipeline_cancelled")
        self._register_event_handler("on_pipeline_finished")
+        self._register_event_handler("on_pipeline_error")

    @property
    def params(self) -> PipelineParams:
@@ -389,13 +449,14 @@ class PipelineTask(BasePipelineTask):
        logger.debug(f"Task {self} scheduled to stop when done")
        await self.queue_frame(EndFrame())

-    async def cancel(self):
-        """Immediately stop the running pipeline.
+    async def cancel(self, *, reason: Optional[str] = None):
+        """Request the running pipeline to cancel.

-        Cancels all running tasks and stops frame processing without
-        waiting for completion.
+        Args:
+            reason: Optional reason to indicate why the pipeline is being cancelled.
        """
-        await self._cancel()
+        if not self._finished:
+            await self._cancel(reason=reason)

    async def run(self, params: PipelineTaskParams):
        """Start and manage the pipeline execution until completion or cancellation.
@@ -405,51 +466,38 @@ class PipelineTask(BasePipelineTask):
        """
        if self.has_finished():
            return
-        cleanup_pipeline = True
+
+        # Setup processors.
+        await self._setup(params)
+
+        # Create all main tasks and wait for the main push task. This is the
+        # task that pushes frames to the very beginning of our pipeline (i.e. to
+        # our controlled source processor).
+        await self._create_tasks()
+
        try:
-            # Setup processors.
-            await self._setup(params)
-
-            # Create all main tasks and wait of the main push task. This is the
-            # task that pushes frames to the very beginning of our pipeline (our
-            # controlled source processor).
-            push_task = await self._create_tasks()
-            await push_task
-
-            # We have already cleaned up the pipeline inside the task.
-            cleanup_pipeline = False
-
-            # Pipeline has finished nicely.
-            self._finished = True
+            # Wait for pipeline to finish.
+            await self._wait_for_pipeline_finished()
        except asyncio.CancelledError:
-            # Raise exception back to the pipeline runner so it can cancel this
-            # task properly.
+            logger.debug(f"Pipeline task {self} got cancelled from outside...")
+            # We have been cancelled from outside, let's just cancel everything.
+            await self._cancel()
+            # Wait again for pipeline to finish. This time we have really
+            # cancelled, so it should really finish.
+            await self._wait_for_pipeline_finished()
+            # Re-raise in case there's more cleanup to do.
            raise
        finally:
            # We can reach this point for different reasons:
            #
-            # 1. The task has finished properly (e.g. `EndFrame`).
-            # 2. By calling `PipelineTask.cancel()`.
-            # 3. By asyncio task cancellation.
-            #
-            # Case (1) will execute the code below without issues because
-            # `self._finished` is true.
-            #
-            # Case (2) will execute the code below without issues because
-            # `self._cancelled` is true.
-            #
-            # Case (3) will raise the exception above (because we are cancelling
-            # the asyncio task). This will be then captured by the
-            # `PipelineRunner` which will call `PipelineTask.cancel()` and
-            # therefore becoming case (2).
-            if self._finished or self._cancelled:
-                logger.debug(f"Pipeline task {self} is finishing cleanup...")
-                await self._cancel_tasks()
-                await self._cleanup(cleanup_pipeline)
-                if self._check_dangling_tasks:
-                    self._print_dangling_tasks()
-                self._finished = True
-                logger.debug(f"Pipeline task {self} has finished")
+            # 1. The pipeline task has finished (try case).
+            # 2. By an asyncio task cancellation (except case).
+            logger.debug(f"Pipeline task {self} is finishing...")
+            await self._cancel_tasks()
+            if self._check_dangling_tasks:
+                self._print_dangling_tasks()
+            self._finished = True
+            logger.debug(f"Pipeline task {self} has finished")

    async def queue_frame(self, frame: Frame):
        """Queue a single frame to be pushed down the pipeline.
@@ -472,24 +520,16 @@ class PipelineTask(BasePipelineTask):
            for frame in frames:
                await self.queue_frame(frame)

-    async def _cancel(self):
-        """Internal cancellation logic for the pipeline task."""
+    async def _cancel(self, *, reason: Optional[str] = None):
+        """Internal cancellation logic for the pipeline task.
+
+        Args:
+            reason: Optional reason to indicate why the pipeline is being cancelled.
+        """
        if not self._cancelled:
            logger.debug(f"Cancelling pipeline task {self}")
            self._cancelled = True
-            cancel_frame = CancelFrame()
-            # Make sure everything is cleaned up downstream. This is sent
-            # out-of-band from the main streaming task which is what we want since
-            # we want to cancel right away.
-            await self._pipeline.queue_frame(cancel_frame)
-            # Wait for CancelFrame to make it through the pipeline.
-            await self._wait_for_pipeline_end(cancel_frame)
-            # Only cancel the push task, we don't want to be able to process any
-            # other frame after cancel. Everything else will be cancelled in
-            # run().
-            if self._process_push_task:
-                await self._task_manager.cancel_task(self._process_push_task)
-                self._process_push_task = None
+            await self.queue_frame(CancelFrame(reason=reason))

    async def _create_tasks(self):
        """Create and start all pipeline processing tasks."""
@@ -544,7 +584,7 @@ class PipelineTask(BasePipelineTask):

    async def _maybe_cancel_idle_task(self):
        """Cancel idle monitoring task if it is running."""
-        if self._idle_timeout_secs and self._idle_monitor_task:
+        if self._idle_monitor_task:
            await self._task_manager.cancel_task(self._idle_monitor_task)
            self._idle_monitor_task = None

@@ -591,8 +631,22 @@ class PipelineTask(BasePipelineTask):

        self._pipeline_end_event.clear()

+        # We are really done.
+        self._pipeline_finished_event.set()
+
+    async def _wait_for_pipeline_finished(self):
+        await self._pipeline_finished_event.wait()
+        self._pipeline_finished_event.clear()
+        # Make sure we wait for the main task to complete.
+        if self._process_push_task:
+            await self._process_push_task
+            self._process_push_task = None
+
    async def _setup(self, params: PipelineTaskParams):
        """Set up the pipeline task and all processors."""
+        # Load additional observers.
+        await self._load_observer_files()
+
        mgr_params = TaskManagerParams(loop=params.loop)
        self._task_manager.setup(mgr_params)

@@ -676,11 +730,11 @@ class PipelineTask(BasePipelineTask):
        if isinstance(frame, EndTaskFrame):
            # Tell the task we should end nicely.
            logger.debug(f"{self}: received end task frame {frame}")
-            await self.queue_frame(EndFrame())
+            await self.queue_frame(EndFrame(reason=frame.reason))
        elif isinstance(frame, CancelTaskFrame):
            # Tell the task we should end right away.
            logger.debug(f"{self}: received cancel task frame {frame}")
-            await self.queue_frame(CancelFrame())
+            await self.queue_frame(CancelFrame(reason=frame.reason))
        elif isinstance(frame, StopTaskFrame):
            # Tell the task we should stop nicely.
            logger.debug(f"{self}: received stop task frame {frame}")
@@ -693,12 +747,11 @@ class PipelineTask(BasePipelineTask):
            logger.debug(f"{self}: received interruption task frame {frame}")
            await self._pipeline.queue_frame(InterruptionFrame())
        elif isinstance(frame, ErrorFrame):
+            await self._call_event_handler("on_pipeline_error", frame)
            if frame.fatal:
                logger.error(f"A fatal error occurred: {frame}")
                # Cancel all tasks downstream.
                await self.queue_frame(CancelFrame())
-                # Tell the task we should stop.
-                await self.queue_frame(StopTaskFrame())
            else:
                logger.warning(f"{self}: Something went wrong: {frame}")

@@ -710,10 +763,6 @@ class PipelineTask(BasePipelineTask):
        processors have handled the EndFrame and therefore we can exit the task
        cleanly.
        """
-        # Queue received frame to the idle queue so we can monitor idle
-        # pipelines.
-        await self._idle_queue.put(frame)
-
        if isinstance(frame, self._reached_downstream_types):
            await self._call_event_handler("on_frame_reached_downstream", frame)

@@ -776,33 +825,10 @@ class PipelineTask(BasePipelineTask):
        Note: Heartbeats are excluded from idle detection.
        """
        running = True
-        last_frame_time = 0
-
        while running:
            try:
-                frame = await asyncio.wait_for(
-                    self._idle_queue.get(), timeout=self._idle_timeout_secs
-                )
-
-                if isinstance(frame, StartFrame) or isinstance(frame, self._idle_timeout_frames):
-                    # If we find a StartFrame or one of the frames that prevents a
-                    # time out we update the time.
-                    last_frame_time = time.time()
-                else:
-                    # If we find any other frame we check if the pipeline is
-                    # idle by checking the last time we received one of the
-                    # valid frames.
-                    diff_time = time.time() - last_frame_time
-                    if diff_time >= self._idle_timeout_secs:
-                        running = await self._idle_timeout_detected()
-                        # Reset `last_frame_time` so we don't trigger another
-                        # immediate idle timeout if we are not cancelling. For
-                        # example, we might want to force the bot to say goodbye
-                        # and then clean nicely with an `EndFrame`.
-                        last_frame_time = time.time()
-
-                self._idle_queue.task_done()
-
+                await asyncio.wait_for(self._idle_event.wait(), timeout=self._idle_timeout_secs)
+                self._idle_event.clear()
            except asyncio.TimeoutError:
                running = await self._idle_timeout_detected()

@@ -814,7 +840,7 @@ class PipelineTask(BasePipelineTask):
        """
        # If we are cancelling, just exit the task.
        if self._cancelled:
-            return True
+            return False

        logger.warning("Idle timeout detected.")
        await self._call_event_handler("on_idle_timeout")
@@ -824,6 +850,27 @@ class PipelineTask(BasePipelineTask):
            return False
        return True

+    async def _load_observer_files(self):
+        observer_files = os.environ.get("PIPECAT_OBSERVER_FILES", "").split(":")
+        for f in observer_files:
+            try:
+                path = Path(f).resolve()
+                module_name = path.stem
+                spec = importlib.util.spec_from_file_location(module_name, str(path))
+                if spec:
+                    logger.debug(f"{self} loading observers from {path}")
+
+                    # Load module.
+                    module = importlib.util.module_from_spec(spec)
+                    spec.loader.exec_module(module)
+
+                    # Create observers.
+                    observers = await module.create_observers(self)
+                    for observer in observers:
+                        self.add_observer(observer)
+            except Exception as e:
+                logger.error(f"{self} error loading external observers from {f}: {e}")
+
    def _print_dangling_tasks(self):
        """Log any dangling tasks that haven't been properly cleaned up."""
        tasks = [t.get_name() for t in self._task_manager.current_tasks()]
--- a/Show More
+++ b/Show More