example: 49-oi-instrumentation.py

temp - for local dev
Merge pull request #3175 from pipecat-ai/pk/thinking-exploration
2025-12-11 20:47:12 -06:00 · 2025-12-11 20:46:50 -06:00 · 2025-12-11 17:15:37 -05:00 · 2025-12-11 17:05:09 -05:00 · 2025-12-11 15:58:48 -05:00 · 2025-12-11 15:07:35 -05:00
174 changed files with 6408 additions and 2686 deletions
--- a/.github/workflows/generate-changelog.yml
+++ b/.github/workflows/generate-changelog.yml
@@ -0,0 +1,174 @@
+name: Generate Changelog for Release
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: "Release version (e.g., 0.0.97)"
+        required: true
+        type: string
+      date:
+        description: "Release date (YYYY-MM-DD format, defaults to today)"
+        required: false
+        type: string
+        default: ""
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  generate-changelog:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true
+
+      - name: Install dependencies
+        run: |
+          uv sync --group dev
+
+      - name: Set release date
+        id: set_date
+        run: |
+          if [ -z "${{ inputs.date }}" ]; then
+            RELEASE_DATE=$(date +%Y-%m-%d)
+            echo "Using today's date: $RELEASE_DATE"
+          else
+            RELEASE_DATE="${{ inputs.date }}"
+            echo "Using provided date: $RELEASE_DATE"
+          fi
+          echo "release_date=$RELEASE_DATE" >> $GITHUB_OUTPUT
+
+      - name: Validate inputs
+        run: |
+          # Validate version format (basic check)
+          if ! [[ "${{ inputs.version }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+.*$ ]]; then
+            echo "Error: Version must be in format X.Y.Z (e.g., 0.0.97)"
+            exit 1
+          fi
+
+          # Validate date format if provided
+          if [ -n "${{ inputs.date }}" ]; then
+            if ! date -d "${{ inputs.date }}" >/dev/null 2>&1; then
+              # Try macOS date format
+              if ! date -j -f "%Y-%m-%d" "${{ inputs.date }}" >/dev/null 2>&1; then
+                echo "Error: Date must be in YYYY-MM-DD format (e.g., 2025-12-04)"
+                exit 1
+              fi
+            fi
+          fi
+
+      - name: Check for changelog fragments
+        id: check_fragments
+        run: |
+          FRAGMENT_COUNT=$(find changelog -name "*.md" ! -name "_template.md.j2" | wc -l | tr -d ' ')
+          echo "fragment_count=$FRAGMENT_COUNT" >> $GITHUB_OUTPUT
+
+          if [ "$FRAGMENT_COUNT" -eq "0" ]; then
+            echo "❌ Error: No changelog fragments found in changelog/"
+            echo ""
+            echo "Cannot create a release without changelog entries."
+            echo "Add changelog fragments to the changelog/ directory (e.g., 1234.added.md) and try again."
+            exit 1
+          fi
+
+          # Validate fragment types
+          VALID_TYPES="added changed deprecated removed fixed security"
+          INVALID_FRAGMENTS=""
+
+          for file in changelog/*.md; do
+            # Skip template
+            if [[ "$file" == "changelog/_template.md.j2" ]]; then
+              continue
+            fi
+            
+            # Extract type from filename (e.g., 1234.added.md -> added)
+            filename=$(basename "$file")
+            # Handle both 1234.added.md and 1234.added.2.md patterns
+            type=$(echo "$filename" | sed -E 's/^[0-9]+\.([a-z]+)(\.[0-9]+)?\.md$/\1/')
+            
+            # Check if type is valid
+            if ! echo "$VALID_TYPES" | grep -wq "$type"; then
+              INVALID_FRAGMENTS="$INVALID_FRAGMENTS\n  - $filename (type: '$type')"
+            fi
+          done
+
+          if [ -n "$INVALID_FRAGMENTS" ]; then
+            echo "❌ Error: Invalid changelog fragment types found:"
+            echo -e "$INVALID_FRAGMENTS"
+            echo ""
+            echo "Valid types are: $VALID_TYPES"
+            echo "Example: 1234.added.md, 5678.fixed.md"
+            exit 1
+          fi
+
+          echo "✓ Found $FRAGMENT_COUNT changelog fragment(s)"
+          echo "has_fragments=true" >> $GITHUB_OUTPUT
+
+      - name: Preview changelog
+        run: |
+          echo "## Preview of changelog for version ${{ inputs.version }}"
+          echo ""
+          uv run towncrier build --draft --version "${{ inputs.version }}" --date "${{ steps.set_date.outputs.release_date }}"
+
+      - name: Build changelog
+        run: |
+          uv run towncrier build --version "${{ inputs.version }}" --date "${{ steps.set_date.outputs.release_date }}" --yes
+
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v7
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          commit-message: "Update changelog for version ${{ inputs.version }}"
+          title: "Release ${{ inputs.version }} - Changelog Update"
+          body: |
+            ## Changelog Update for Release ${{ inputs.version }}
+
+            This PR updates the CHANGELOG.md with all changes for version **${{ inputs.version }}**.
+
+            ### Summary
+            - **Version:** ${{ inputs.version }}
+            - **Date:** ${{ steps.set_date.outputs.release_date }}
+            - **Fragments processed:** ${{ steps.check_fragments.outputs.fragment_count }}
+
+            ### What this PR does
+            - ✅ Adds new release section to CHANGELOG.md
+            - ✅ Removes processed changelog fragments
+            - ✅ Ready to merge for release
+
+            ### Next Steps
+            1. Review the changelog entries below
+            2. Make any necessary edits to CHANGELOG.md if needed
+            3. Merge this PR
+            4. Continue with your release process
+
+            ---
+
+            <details>
+            <summary>📋 Preview of changes</summary>
+
+            The changelog has been updated with entries from the following fragments:
+
+            ```bash
+            ${{ steps.check_fragments.outputs.fragment_count }} fragments processed
+            ```
+
+            </details>
+          branch: changelog-${{ inputs.version }}
+          delete-branch: true
+          labels: |
+            changelog
+            release
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,10 +5,145 @@ All notable changes to **Pipecat** will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

-## [Unreleased]
+<!-- towncrier release notes start -->
+
+## [0.0.97] - 2025-12-05

 ### Added

+- Added new Gradium services, `GradiumSTTService` and `GradiumTTSService`, for
+  speech-to-text and text-to-speech functionality using Gradium's API.
+
+- Additions for `AsyncAITTSService` and `AsyncAIHttpTTSService`:
+
+  - Added new `languages`: `pt`, `nl`, `ar`, `ru`, `ro`, `ja`, `he`, `hy`,
+    `tr`, `hi`, `zh`.
+  - Updated the default model to `asyncflow_multilingual_v1.0` for improved
+    accuracy and broader language coverage.
+
+- Added optional tool and tool output filters for MCP services.
+
+### Changed
+
+- Updated Deepgram logging to include Deepgram request IDs for improved
+  debugging.
+
+- Text Aggregation Improvements:
+
+  - **Breaking Change**: `BaseTextAggregator.aggregate()` now returns
+    `AsyncIterator[Aggregation]` instead of `Optional[Aggregation]`. This
+    enables the aggregator to return multiple results based on the provided
+    text.
+  - Refactored text aggregators to use inheritance: `SkipTagsAggregator` and
+    `PatternPairAggregator` now inherit from `SimpleTextAggregator`, reusing
+    the base class's sentence detection logic.
+
+- Improved interruption handling to prevent bots from repeating themselves. LLM
+  services that return multiple sentences in a single response (e.g.,
+  `GoogleLLMService`) are now split into individual sentences before being sent
+  to TTS. This ensures interruptions occur at sentence boundaries, preventing
+  the bot from repeating content after being interrupted during long responses.
+
+- Updated `AICFilter` to use Quail STT as the default model
+  (`AICModelType.QUAIL_STT`). Quail STT is optimized for human-to-machine
+  interaction (e.g., voice agents, speech-to-text) and operates at a native
+  sample rate of 16 kHz with fixed enhancement parameters.
+
+- If an unexpected exception is caught, or if `FrameProcessor.push_error()` is
+  called with an exception, the file name and line number where the exception
+  occured are now logged.
+
+- Updated Smart Turn model weights to v3.1.
+
+- Smart Turn analyzer now uses the full context of the turn rather than just
+  the audio since VAD last triggered.
+
+- Updated `CartesiaSTTService` to return the full transcription `result` in the
+  `TranscriptionFrame` and `InterimTranscriptionFrame`. This provides access to
+  word timestamp data.
+
+- `HumeTTSService` changes:
+
+  - Added tracking headers (`X-Hume-Client-Name` and `X-Hume-Client-Version`)
+    to all requests made by `HumeTTSService` to the Hume API for better usage
+    tracking and analytics.
+  - Added `stop()` and `cancel()` cleanup methods to `HumeTTSService` to
+    properly close the HTTP client and prevent resource leaks.
+
+### Deprecated
+
+- NVIDIA Services name changes (all functionality is unchanged):
+
+  - `NimLLMService` is now deprecated, use `NvidiaLLMService` instead.
+  - `RivaSTTService` is now deprecated, use `NvidiaSTTService` instead.
+  - `RivaTTSService` is now deprecated, use `NvidiaTTSService` instead.
+  - Use `uv pip install pipecat-ai[nvidia]` instead of
+    `uv pip install pipecat-ai[riva]`
+
+- The `noise_gate_enable` parameter in `AICFilter` is deprecated and no longer
+  has any effect. Noise gating is now handled automatically by the AIC VAD
+  system. Use `AICFilter.create_vad_analyzer()` for VAD functionality instead.
+
+- Package `pipecat.sync` is deprecated, use `pipecat.utils.sync` instead.
+
+### Fixed
+
+- Fixed bug in `PatternPairAggregator` where pattern handlers could be called
+  multiple times for `KEEP` or `AGGREGATE` patterns.
+
+- Fixed sentence aggregation to correctly handle ambiguous punctuation in
+  streaming text, such as currency ("$29.95") and abbreviations ("Mr. Smith").
+
+- Fixed an issue in `AWSTranscribeSTTService` where the `region` arg was always
+  set to `us-east-1` when providing an AWS_REGION env var.
+
+- Fixed an issue in `SarvamTTSService` where the last sentence was not being
+  spoken. Now, audio is flushed when the TTS services receives the
+  `LLMFullResponseEndFrame` or `EndFrame`.
+
+- Fixed an issue in `DeepgramTTSService` where a `TTSStoppedFrame` was
+  incorrectly pushed after a functional call. This caused an issue with the
+  voice-ui-kit's conversational panel rending of the LLM output after a
+  function call.
+
+- Fixed an issue where `LLMTextFrame.skip_tts` was being overwritten by LLM
+  services.
+
+- Fixed an issue that caused `WebsocketService` instances to attempt
+  reconnection during shutdown.
+
+- Fixed an issue in `ElevenLabsTTSService` where character usage metrics were
+  only reported on the first TTS generation per turn.
+
+## [0.0.96] - 2025-11-26 🦃 "Happy Thanksgiving!" 🦃
+
+### Added
+
+- Added `AWSBedrockAgentCoreProcessor` to support invoking an AgentCore-hosted
+  agent in a Pipecat pipeline.
+
+- Enhanced error handling across the framework:
+
+  - Added `on_error` callback to `FrameProcessor` for centralized error
+    handling.
+
+  - Renamed `push_error(error: ErrorFrame)` to `push_error_frame(error: ErrorFrame)`
+    for clarity.
+
+  - Added new `push_error` method for simplified error reporting:
+
+    ```python
+    async def push_error(error_msg: str,
+                         exception: Optional[Exception] = None,
+                         fatal: bool = False)
+    ```
+
+  - Standardized error logging by replacing `logger.exception` calls with
+    `logger.error` throughout the codebase.
+
+- Added `cache_read_input_tokens`, `cache_creation_input_tokens` and
+  `reasoning_tokens` to OTel spans for LLM call
+
 - Added `LiveKitRESTHelper` utility class for managing LiveKit rooms via REST API.

 - Added `DeepgramSageMakerSTTService` which connects to a SageMaker hosted
@@ -88,8 +223,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

  - Added new emotions: calm and fluent

+- Added `enable_logging` to `SimliVideoService` input parameters. It's disabled
+  by default.
+
 ### Changed

+- Updated `FishAudioTTSService` default model to `s1`.
+
+- Updated `DeepgramTTSService` to use Deepgram's TTS websocket API. ⚠️ This is
+  a potential breaking change, which only affects you if you're self-hosting
+  `DeepgramTTSService`. The new service uses Websockets and improves TTFB
+  latency.
+
 - Updated `daily-python` to 0.22.0.

 - `BaseTextAggregator` changes:
@@ -247,6 +392,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Fixed

+- Fixed an issue in `AWSBedrockLLMService` where the `aws_region` arg was
+  always set to `us-east-1` when providing an AWS_REGION env var.
+
+- Fixed an issue with `DeepgramFluxSTTService` where it sometimes failed to reconnect.
+
 - Fixed an issue in `ElevenLabsRealtimeSTTService` where dynamic language
  updates were not working.

--- a/COMMUNITY_INTEGRATIONS.md
+++ b/COMMUNITY_INTEGRATIONS.md
@@ -79,7 +79,7 @@ Once your PR is submitted, post in the `#community-integrations` Discord channel

 **Examples:**

- [RivaSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/riva/stt.py)
+- [NvidiaSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/nvidia/stt.py)
 - [FalSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/fal/stt.py)

 #### Key requirements:
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -17,24 +17,121 @@ We welcome contributions of all kinds! Your help is appreciated. Follow these st
   git checkout -b your-branch-name
   ```
 4. **Make your changes**: Edit or add files as necessary.
-5. **Test your changes**: Ensure that your changes look correct and follow the style set in the codebase.
-6. **Commit your changes**: Once you're satisfied with your changes, commit them with a meaningful message.
+5. **Add a changelog entry**: Create a changelog fragment file (see [Changelog Entries](#changelog-entries) below).
+6. **Test your changes**: Ensure that your changes look correct and follow the style set in the codebase.
+7. **Commit your changes**: Once you're satisfied with your changes, commit them with a meaningful message.

 ```bash
 git commit -m "Description of your changes"
 ```

-7. **Push your changes**: Push your branch to your forked repository.
+8. **Push your changes**: Push your branch to your forked repository.

 ```bash
 git push origin your-branch-name
 ```

-8. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
+9. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
   > Important: Describe the changes you've made clearly!

 Our maintainers will review your PR, and once everything is good, your contributions will be merged!

+## Changelog Entries
+
+Every pull request that makes a user-facing change should include a changelog entry. We use a changelog fragment system to avoid merge conflicts.
+
+### Creating a Changelog Fragment
+
+1. Create a new file in the `changelog/` directory with this naming pattern:
+
+   ```
+   <PR_number>.<type>.md
+   ```
+
+2. Choose the appropriate type:
+
+   - `added.md` - New features
+   - `changed.md` - Changes in existing functionality
+   - `deprecated.md` - Soon-to-be removed features
+   - `removed.md` - Removed features
+   - `fixed.md` - Bug fixes
+   - `security.md` - Security fixes
+
+3. Write your changelog entry as a Markdown bullet point. Include the `-` at the start:
+
+**Example files:**
+
+`changelog/1234.added.md`:
+
+```markdown
+- Added support for Anthropic Claude 3.5 Sonnet with improved streaming performance.
+```
+
+`changelog/5678.fixed.md`:
+
+```markdown
+- Fixed an issue where audio frames were dropped during high-load scenarios.
+```
+
+**For entries with nested bullets:**
+
+`changelog/1234.changed.md`:
+
+```markdown
+- Updated service configuration:
+
+  - Changed default timeout to 30 seconds
+  - Added retry logic for failed connections
+```
+
+### Multiple Changes in One PR
+
+**Different types of changes:** Create separate fragment files for each type:
+
+```
+changelog/1234.added.md
+changelog/1234.fixed.md
+```
+
+**Multiple changes of the same type:** Create numbered fragment files:
+
+```
+changelog/1234.changed.md
+changelog/1234.changed.2.md
+```
+
+**Related changes:** Use nested bullets in a single fragment:
+
+```markdown
+- Updated service configuration:
+
+  - Changed default timeout to 30 seconds
+  - Added retry logic for failed connections
+```
+
+**Rule of thumb:** One logical change per fragment file. If changes are unrelated, use separate files.
+
+### Preview Your Changes
+
+To see what your changelog entry will look like:
+
+```bash
+towncrier build --draft --version Unreleased
+```
+
+This won't modify any files, just show you a preview.
+
+### When to Skip Changelog Entries
+
+You can skip adding a changelog entry for:
+
+- Documentation-only changes
+- Internal refactoring with no user-facing impact
+- Test-only changes
+- CI/build configuration changes
+
+If you're unsure whether your change needs a changelog entry, ask in your PR!
+
 ## Dependency Management

 This project uses [uv](https://docs.astral.sh/uv/) for dependency management. The `uv.lock` file is committed to ensure reproducible builds.
--- a/README.md
+++ b/README.md
@@ -3,7 +3,6 @@
 </div></h1>

 [![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) ![Tests](https://github.com/pipecat-ai/pipecat/actions/workflows/tests.yaml/badge.svg) [![codecov](https://codecov.io/gh/pipecat-ai/pipecat/graph/badge.svg?token=LNVUIVO4Y9)](https://codecov.io/gh/pipecat-ai/pipecat) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/pipecat-ai/pipecat)
-[![](https://getmanta.ai/api/badges?text=Manta%20Graph&link=manta)](https://getmanta.ai/pipecat)

 # 🎙️ Pipecat: Real-Time Voice & Multimodal AI Agents

@@ -74,9 +73,9 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout

 | Category            | Services                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| Speech-to-Text      | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper)                                                                                                                                                                                          |
+| Speech-to-Text      | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper)                                                                                                                                                                                          |
 | LLMs                | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together)                                                                                                                                                                                                                              |
-| Text-to-Speech      | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
+| Text-to-Speech      | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
 | Speech-to-Speech    | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | Transport           | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | Serializers         | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
--- a/changelog/3085.added.md
+++ b/changelog/3085.added.md
@@ -0,0 +1,2 @@
+- Added `RimeNonJsonTTSService` which supports non-JSON streaming mode. This new class supports websocket streaming for the Arcana model.
+
--- a/changelog/3175.added.md
+++ b/changelog/3175.added.md
@@ -0,0 +1,41 @@
+- Added additional functionality related to "thinking", for Google and Anthropic
+  LLMs.
+
+  1. New typed parameters for Google and Anthropic LLMs that control the
+     models' thinking behavior (like how much thinking to do, and whether to
+     output thoughts or thought summaries):
+     - `AnthropicLLMService.ThinkingConfig`
+     - `GoogleLLMService.ThinkingConfig`
+  2. New frames for representing thoughts output by LLMs:
+     - `LLMThoughtStartFrame`
+     - `LLMThoughtTextFrame`
+     - `LLMThoughtEndFrame`
+  3. A mechanism for appending arbitrary context messages after a function call
+     message, used specifically to support Google's function-call-related
+     "thought signatures", which are necessary to ensure thinking continuity
+     between function calls in a chain (where the model thinks, makes a function
+     call, thinks some more, etc.). See:
+     - `append_extra_context_messages` field in `FunctionInProgressFrame` and
+       helper types
+     - `GoogleLLMService` leveraging the new mechanism to add a Google-specific
+       `"fn_thought_signature"` message
+     - `LLMAssistantAggregator` handling of `append_extra_context_messages`
+     - `GeminiLLMAdapter` handling of `"fn_thought_signature"` messages
+  4. A generic mechanism for recording LLM thoughts to context, used
+     specifically to support Anthropic, whose thought signatures are expected to
+     appear alongside the text of the thoughts within assistant context
+     messages. See:
+     - `LLMThoughtEndFrame.signature`
+     - `LLMAssistantAggregator` handling of the above field
+     - `AnthropicLLMAdapter` handling of `"thought"` context messages
+  5. Google-specific logic for inserting non-function-call-related thought
+     signatures into the context, to help maintain thinking continuity in a
+     chain of LLM calls. See:
+     - `GoogleLLMService` sending `LLMMessagesAppendFrame`s to add LLM-specific
+       `"non_fn_thought_signature"` messages to context
+     - `GeminiLLMAdapter` handling of `"non_fn_thought_signature"` messages
+  6. An expansion of `TranscriptProcessor` to process LLM thoughts in addition
+     to user and assistant utterances. See:
+     - `TranscriptProcessor(process_thoughts=True)` (defaults to `False`)
+     - `ThoughtTranscriptionMessage`, which is now also emitted with the
+       `"on_transcript_update"` event
--- a/changelog/3189.added.md
+++ b/changelog/3189.added.md
@@ -0,0 +1,6 @@
+- Data and control frames can now be marked as non-interruptible by using the
+  `UninterruptibleFrame` mixin. Frames marked as `UninterruptibleFrame` will not
+  be interrupted during processing, and any queued frames of this type will be
+  retained in the internal queues. This is useful when you need ordered frames
+  (data or control) that should not be discarded or cancelled due to
+  interruptions.
--- a/changelog/3189.changed.md
+++ b/changelog/3189.changed.md
@@ -0,0 +1,3 @@
+- `FunctionCallInProgressFrame` and `FunctionCallResultFrame` have changed from
+  system frames to a control frame and a data frame, respectively, and are now
+  both marked as `UninterruptibleFrame`.
--- a/changelog/3206.changed.md
+++ b/changelog/3206.changed.md
@@ -0,0 +1,3 @@
+- `UserBotLatencyLogObserver` now uses `VADUserStartedSpeakingFrame` and 
+`VADUserStoppedSpeakingFrame` to determine latency from user stopped speaking
+to bot started speaking.
--- a/changelog/3207.added.md
+++ b/changelog/3207.added.md
@@ -0,0 +1 @@
+- Added `on_conversation_detected` event to `VoicemaiDetector`.
--- a/changelog/3208.added.md
+++ b/changelog/3208.added.md
@@ -0,0 +1 @@
+- Added `x-goog-api-client` header with Pipecat's version to all Google services' requests.
--- a/changelog/3212.added.md
+++ b/changelog/3212.added.md
@@ -0,0 +1,6 @@
+- Added to `AWSNovaSonicLLMService` functionality related to the new (and now
+  default) Nova 2 Sonic model (`"amazon.nova-2-sonic-v1:0"`):
+  - Added the `endpointing_sensitivity` parameter to control how quickly the
+    model decides the user has stopped speaking.
+  - Made the assistant-response-trigger hack a no-op. It's only needed for the
+    older Nova Sonic model.
--- a/changelog/3212.changed.md
+++ b/changelog/3212.changed.md
@@ -0,0 +1 @@
+- Made `"amazon.nova-2-sonic-v1:0"` the new default model for `AWSNovaSonicLLMService`.
--- a/changelog/3212.fixed.md
+++ b/changelog/3212.fixed.md
@@ -0,0 +1,2 @@
+- Fixed a bug in `AWSNovaSonicLLMService` where we would mishandle cancelled
+  tool calls in the context, resulting in errors.
--- a/changelog/3219.deprecated.md
+++ b/changelog/3219.deprecated.md
@@ -0,0 +1 @@
+- `FalSmartTurnAnalyzer` and `LocalSmartTurnAnalyzer` are deprecated and will be removed in a future version. Use `LocalSmartTurnAnalyzerV3` instead.
--- a/changelog/_template.md.j2
+++ b/changelog/_template.md.j2
@@ -0,0 +1,16 @@
+{% for section, _ in sections.items() %}
+{% if sections[section] %}
+{% for category, val in definitions.items() if category in sections[section]%}
+### {{ definitions[category]['name'] }}
+
+{% for text, values in sections[section][category].items() %}
+{{ text }}
+
+{% endfor %}
+{% endfor %}
+{% else %}
+No significant changes.
+
+{% endif %}
+{% endfor %}
+
--- a/docs/TURN_AWARE_TRANSCRIPT_PROCESSOR.md
+++ b/docs/TURN_AWARE_TRANSCRIPT_PROCESSOR.md
@@ -1,103 +0,0 @@
-# TurnAwareTranscriptProcessor Example
-
-## Overview
-
-The `TurnAwareTranscriptProcessor` combines user and assistant transcript tracking with turn boundary detection. It correctly handles interruptions by only capturing what was actually spoken.
-
-## Basic Usage
-
-```python
-from pipecat.processors.transcript_processor import TurnAwareTranscriptProcessor
-
-# Create the processor
-turn_processor = TurnAwareTranscriptProcessor()
-
-# Register event handlers
-@turn_processor.event_handler("on_turn_started")
-async def handle_turn_started(processor, turn_number):
-    print(f"Turn {turn_number} started")
-
-@turn_processor.event_handler("on_turn_ended")
-async def handle_turn_ended(processor, turn_number, user_text, assistant_text, was_interrupted):
-    print(f"\nTurn {turn_number} ended:")
-    print(f"  User said: {user_text}")
-    print(f"  Assistant said: {assistant_text}")
-    print(f"  Was interrupted: {was_interrupted}")
-
-@turn_processor.event_handler("on_transcript_update")
-async def handle_transcript_update(processor, frame):
-    for msg in frame.messages:
-        print(f"[{msg.role}]: {msg.content}")
-
-# Add to pipeline
-pipeline = Pipeline([
-    transport.input(),
-    stt,
-    turn_processor,  # Process transcripts and track turns
-    context_aggregator.user(),
-    llm,
-    tts,
-    transport.output(),
-    context_aggregator.assistant(),
-])
-```
-
-## Features
-
-1. **Turn Boundary Detection**: Automatically detects when turns start and end based on user and bot speaking patterns
-2. **Interruption Handling**: Correctly captures only what was actually spoken when interruptions occur
-3. **Real-time Transcripts**: Emits transcript messages for both user and assistant speech
-4. **Turn Events**: Provides start/end events with accumulated transcripts for each turn
-
-## Events
-
-### on_turn_started
-Emitted when a new turn begins (user starts speaking).
-
-**Handler signature**: `async def handler(processor, turn_number)`
-
-### on_turn_ended
-Emitted when a turn ends with accumulated transcripts.
-
-**Handler signature**: `async def handler(processor, turn_number, user_transcript, assistant_transcript, was_interrupted)`
-
-### on_transcript_update  
-Inherited from `BaseTranscriptProcessor`, emitted for individual transcript messages.
-
-**Handler signature**: `async def handler(processor, frame)`
-
-## Turn Logic
-
- Turns start when the user begins speaking (`UserStartedSpeakingFrame`)
- Turns end when:
-  - The user starts speaking again (previous turn ends, new turn starts)
-  - The bot is interrupted (`InterruptionFrame`)
-  - The pipeline ends (`EndFrame`/`CancelFrame`)
-
-## Integration with OpenTelemetry
-
-You can use turn events to enrich OpenTelemetry spans:
-
-```python
-from pipecat.utils.tracing.turn_trace_observer import TurnTraceObserver
-
-turn_tracker = TurnTrackingObserver()
-turn_tracer = TurnTraceObserver(turn_tracker)
-turn_processor = TurnAwareTranscriptProcessor()
-
-@turn_processor.event_handler("on_turn_ended")
-async def add_transcripts_to_span(processor, turn_number, user_text, assistant_text, interrupted):
-    # Get current span and add transcript data
-    from opentelemetry import trace
-    current_span = trace.get_current_span()
-    if current_span:
-        current_span.set_attribute("turn.user_text", user_text)
-        current_span.set_attribute("turn.assistant_text", assistant_text)
-```
-
-## Notes
-
- The processor handles async frame processing correctly by delaying turn end until frames are processed
- Works with word-level timestamps from TTS services like Cartesia
- Accumulates both user (`TranscriptionFrame`) and assistant (`TTSTextFrame`) speech
- Emits individual transcript messages in addition to turn-level aggregation
--- a/docs/api/conf.py
+++ b/docs/api/conf.py
@@ -119,7 +119,6 @@ def import_core_modules():
        "pipecat.observers",
        "pipecat.runner",
        "pipecat.serializers",
-        "pipecat.sync",
        "pipecat.transcriptions",
        "pipecat.utils",
    ]
--- a/docs/api/index.rst
+++ b/docs/api/index.rst
@@ -30,7 +30,6 @@ Quick Links
   Runner <api/pipecat.runner>
   Serializers <api/pipecat.serializers>
   Services <api/pipecat.services>
-   Sync <api/pipecat.sync>
   Transcriptions <api/pipecat.transcriptions>
   Transports <api/pipecat.transports>
-   Utils <api/pipecat.utils>
+   Utils <api/pipecat.utils>
--- a/env.example
+++ b/env.example
@@ -73,6 +73,9 @@ GOOGLE_CLOUD_PROJECT_ID=...
 GOOGLE_CLOUD_LOCATION=...
 GOOGLE_TEST_CREDENTIALS=...

+# Gradium
+GRAPDIUM_API_KEY=...
+
 # Grok
 GROK_API_KEY=...

@@ -191,4 +194,4 @@ TWILIO_AUTH_TOKEN=...
 WHATSAPP_TOKEN=...
 WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=...
 WHATSAPP_PHONE_NUMBER_ID=...
-WHATSAPP_APP_SECRET=...
+WHATSAPP_APP_SECRET=...
--- a/examples/foundational/01c-nvidia-riva-tts.py
+++ b/examples/foundational/01c-nvidia-riva-tts.py
@@ -15,7 +15,7 @@ from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineTask
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.riva.tts import FastPitchTTSService
+from pipecat.services.nvidia.tts import NvidiaTTSService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -36,7 +36,7 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
+    tts = NvidiaTTSService(api_key=os.getenv("NVIDIA_API_KEY"))

    task = PipelineTask(
        Pipeline([tts, transport.output()]),
--- a/examples/foundational/07af-interruptible-gradium.py
+++ b/examples/foundational/07af-interruptible-gradium.py
@@ -0,0 +1,127 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.gradium.stt import GradiumSTTService
+from pipecat.services.gradium.tts import GradiumTTSService
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = GradiumSTTService(api_key=os.getenv("GRADIUM_API_KEY"))
+
+    tts = GradiumTTSService(
+        api_key=os.getenv("GRADIUM_API_KEY"),
+        voice_id="YTpq7expH9539ERJ",
+    )
+
+    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/07n-interruptible-gemini.py
+++ b/examples/foundational/07n-interruptible-gemini.py
@@ -136,7 +136,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        messages.append(
            {
                "role": "system",
-                "content": "Hello! I'm your AI assistant. I can help you with a variety of tasks. What would you like to know?",
+                "content": "You are an AI assistant. You can help with a variety of tasks. Introduce yourself and ask the user what they would like to know.",
            }
        )
        await task.queue_frames([LLMRunFrame()])
--- a/examples/foundational/07n-interruptible-google-http.py
+++ b/examples/foundational/07n-interruptible-google-http.py
@@ -75,8 +75,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm = GoogleLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        model="gemini-2.5-flash",
-        # turn on thinking if you want it
-        # params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),)
+        # force a certain amount of thinking if you want it
+        # params=GoogleLLMService.InputParams(
+        #     thinking=GoogleLLMService.ThinkingConfig(thinking_budget=4096)
+        # ),
    )

    messages = [
--- a/examples/foundational/07n-interruptible-google.py
+++ b/examples/foundational/07n-interruptible-google.py
@@ -75,8 +75,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm = GoogleLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        model="gemini-2.5-flash",
-        # turn on thinking if you want it
-        # params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),)
+        # force a certain amount of thinking if you want it
+        # params=GoogleLLMService.InputParams(
+        #     thinking=GoogleLLMService.ThinkingConfig(thinking_budget=4096)
+        # ),
    )

    messages = [
--- a/examples/foundational/07r-interruptible-riva-nim.py
+++ b/examples/foundational/07r-interruptible-riva-nim.py
@@ -22,9 +22,9 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.nim.llm import NimLLMService
-from pipecat.services.riva.stt import RivaSTTService
-from pipecat.services.riva.tts import RivaTTSService
+from pipecat.services.nvidia.llm import NvidiaLLMService
+from pipecat.services.nvidia.stt import NvidiaSTTService
+from pipecat.services.nvidia.tts import NvidiaTTSService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -59,11 +59,13 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    stt = RivaSTTService(api_key=os.getenv("NVIDIA_API_KEY"))
+    stt = NvidiaSTTService(api_key=os.getenv("NVIDIA_API_KEY"))

-    llm = NimLLMService(api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct")
+    llm = NvidiaLLMService(
+        api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct"
+    )

-    tts = RivaTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
+    tts = NvidiaTTSService(api_key=os.getenv("NVIDIA_API_KEY"))

    messages = [
        {
--- a/examples/foundational/07s-interruptible-google-audio-in.py
+++ b/examples/foundational/07s-interruptible-google-audio-in.py
@@ -224,8 +224,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm = GoogleLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        model="gemini-2.5-flash",
-        # turn on thinking if you want it
-        # params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),
+        # force a certain amount of thinking if you want it
+        # params=GoogleLLMService.InputParams(
+        #     thinking=GoogleLLMService.ThinkingConfig(thinking_budget=4096)
+        # ),
    )

    tts = GoogleTTSService(
--- a/examples/foundational/14i-function-calling-fireworks.py
+++ b/examples/foundational/14i-function-calling-fireworks.py
@@ -76,7 +76,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    llm = FireworksLLMService(
        api_key=os.getenv("FIREWORKS_API_KEY"),
-        model="accounts/fireworks/models/llama-v3p1-405b-instruct",
+        model="accounts/fireworks/models/gpt-oss-20b",
    )
    # You can also register a function_name of None to get all functions
    # sent to the same callback with an additional function_name parameter.
--- a/examples/foundational/14j-function-calling-nvidia.py
+++ b/examples/foundational/14j-function-calling-nvidia.py
@@ -27,7 +27,7 @@ from pipecat.runner.utils import create_transport
 from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.nim.llm import NimLLMService
+from pipecat.services.nvidia.llm import NvidiaLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -75,11 +75,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        # text_filters=[MarkdownTextFilter()],
    )

-    llm = NimLLMService(
+    llm = NvidiaLLMService(
        api_key=os.getenv("NVIDIA_API_KEY"),
        model="nvidia/llama-3.3-nemotron-super-49b-v1.5",
        # Recommended when turning thinking off
-        params=NimLLMService.InputParams(temperature=0.0),
+        params=NvidiaLLMService.InputParams(temperature=0.0),
    )
    # You can also register a function_name of None to get all functions
    # sent to the same callback with an additional function_name parameter.
--- a/examples/foundational/19-openai-realtime.py
+++ b/examples/foundational/19-openai-realtime.py
@@ -14,20 +14,13 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.adapters.services.open_ai_realtime_adapter import OpenAIRealtimeLLMAdapter
 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import (
-    LLMRunFrame,
-    LLMSetToolsFrame,
-    LLMUpdateSettingsFrame,
-    TranscriptionMessage,
-)
+from pipecat.frames.frames import LLMRunFrame, LLMSetToolsFrame, TranscriptionMessage
 from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.processors.transcript_processor import TranscriptProcessor
 from pipecat.runner.types import RunnerArguments
--- a/examples/foundational/19a-azure-realtime.py
+++ b/examples/foundational/19a-azure-realtime.py
@@ -19,7 +19,6 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
--- a/examples/foundational/22-natural-conversation.py
+++ b/examples/foundational/22-natural-conversation.py
@@ -28,10 +28,10 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.llm_service import LLMService
 from pipecat.services.openai.llm import OpenAIContextAggregatorPair, OpenAILLMService
-from pipecat.sync.event_notifier import EventNotifier
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+from pipecat.utils.sync.event_notifier import EventNotifier

 load_dotenv(override=True)

--- a/examples/foundational/22b-natural-conversation-proposal.py
+++ b/examples/foundational/22b-natural-conversation-proposal.py
@@ -45,11 +45,11 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.llm_service import FunctionCallParams, LLMService
 from pipecat.services.openai.llm import OpenAILLMService
-from pipecat.sync.base_notifier import BaseNotifier
-from pipecat.sync.event_notifier import EventNotifier
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+from pipecat.utils.sync.base_notifier import BaseNotifier
+from pipecat.utils.sync.event_notifier import EventNotifier
 from pipecat.utils.time import time_now_iso8601

 load_dotenv(override=True)
--- a/examples/foundational/22c-natural-conversation-mixed-llms.py
+++ b/examples/foundational/22c-natural-conversation-mixed-llms.py
@@ -46,11 +46,11 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.llm_service import FunctionCallParams, LLMService
 from pipecat.services.openai.llm import OpenAILLMService
-from pipecat.sync.base_notifier import BaseNotifier
-from pipecat.sync.event_notifier import EventNotifier
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+from pipecat.utils.sync.base_notifier import BaseNotifier
+from pipecat.utils.sync.event_notifier import EventNotifier
 from pipecat.utils.time import time_now_iso8601

 load_dotenv(override=True)
--- a/examples/foundational/22d-natural-conversation-gemini-audio.py
+++ b/examples/foundational/22d-natural-conversation-gemini-audio.py
@@ -47,11 +47,11 @@ from pipecat.runner.utils import create_transport
 from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.google.llm import GoogleLLMService
 from pipecat.services.llm_service import LLMService
-from pipecat.sync.base_notifier import BaseNotifier
-from pipecat.sync.event_notifier import EventNotifier
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+from pipecat.utils.sync.base_notifier import BaseNotifier
+from pipecat.utils.sync.event_notifier import EventNotifier
 from pipecat.utils.time import time_now_iso8601

 load_dotenv(override=True)
--- a/examples/foundational/39-mcp-stdio.py
+++ b/examples/foundational/39-mcp-stdio.py
@@ -64,11 +64,14 @@ class UrlToImageProcessor(FrameProcessor):
            await self.push_frame(frame, direction)

    def extract_url(self, text: str):
-        data = json.loads(text)
-        if "artObject" in data:
-            return data["artObject"]["webImage"]["url"]
-        if "artworks" in data and len(data["artworks"]):
-            return data["artworks"][0]["webImage"]["url"]
+        try:
+            data = json.loads(text)
+            if "artObject" in data:
+                return data["artObject"]["webImage"]["url"]
+            if "artworks" in data and len(data["artworks"]):
+                return data["artworks"][0]["webImage"]["url"]
+        except:
+            pass

        return None

@@ -88,6 +91,23 @@ class UrlToImageProcessor(FrameProcessor):
            logger.error(error_msg)


+# full list of tools available from rijksmuseum MCP:
+# - get_artwork_details
+# - get_artwork_image
+# - get_user_sets
+# - get_user_set_details
+# - open_image_in_browser
+# - get_artist_timeline
+
+mcp_tools_filter = ["get_artwork_details", "get_artwork_image", "open_image_in_browser"]
+
+
+def open_image_output_filter(output: str):
+    pattern = r"Successfully opened image in browser: "
+    text_to_print = re.sub(pattern, "", output)
+    print(f"🖼️ link to high resolution artwork: {text_to_print}")
+
+
 # We store functions so objects (e.g. SileroVADAnalyzer) don't get
 # instantiated. The function will be called when the desired transport gets
 # selected.
@@ -136,7 +156,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
                    # https://github.com/r-huijts/rijksmuseum-mcp
                    args=["-y", "mcp-server-rijksmuseum"],
                    env={"RIJKSMUSEUM_API_KEY": os.getenv("RIJKSMUSEUM_API_KEY")},
-                )
+                ),
+                # Optional
+                tools_filter=mcp_tools_filter,  # Optional
+                tools_output_filters={"open_image_in_browser": open_image_output_filter},
            )
        except Exception as e:
            logger.error(f"error setting up mcp")
--- a/examples/foundational/39c-multiple-mcp.py
+++ b/examples/foundational/39c-multiple-mcp.py
@@ -67,13 +67,14 @@ class UrlToImageProcessor(FrameProcessor):
            await self.push_frame(frame, direction)

    def extract_url(self, text: str):
-        data = json.loads(text)
-        if "artObject" in data:
-            return data["artObject"]["webImage"]["url"]
-        if "artworks" in data and len(data["artworks"]):
-            return data["artworks"][0]["webImage"]["url"]
-
-        return None
+        try:
+            data = json.loads(text)
+            if "artObject" in data:
+                return data["artObject"]["webImage"]["url"]
+            if "artworks" in data and len(data["artworks"]):
+                return data["artworks"][0]["webImage"]["url"]
+        except:
+            pass

    async def run_image_process(self, image_url: str):
        try:
--- a/examples/foundational/40-aws-nova-sonic.py
+++ b/examples/foundational/40-aws-nova-sonic.py
@@ -5,7 +5,9 @@
 #


+import asyncio
 import os
+import random
 from datetime import datetime

 from dotenv import load_dotenv
@@ -33,11 +35,21 @@ load_dotenv(override=True)


 async def fetch_weather_from_api(params: FunctionCallParams):
-    temperature = 75 if params.arguments["format"] == "fahrenheit" else 24
+    temperature = (
+        random.randint(60, 85)
+        if params.arguments["format"] == "fahrenheit"
+        else random.randint(15, 30)
+    )
+    # Simulate a long network delay.
+    # You can continue chatting while waiting for this to complete.
+    # With Nova 2 Sonic (the default model), the assistant will respond
+    # appropriately once the function call is complete.
+    await asyncio.sleep(5)
    await params.result_callback(
        {
            "conditions": "nice",
            "temperature": temperature,
+            "location": params.arguments["location"],
            "format": params.arguments["format"],
            "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
        }
@@ -91,23 +103,31 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

    # Specify initial system instruction.
-    # HACK: note that, for now, we need to inject a special bit of text into this instruction to
-    # allow the first assistant response to be programmatically triggered (which happens in the
-    # on_client_connected handler, below)
    system_instruction = (
        "You are a friendly assistant. The user and you will engage in a spoken dialog exchanging "
        "the transcripts of a natural real-time conversation. Keep your responses short, generally "
-        "two or three sentences for chatty scenarios. "
-        f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}"
+        "two or three sentences for chatty scenarios."
+        # HACK: if using the older Nova Sonic (pre-2) model, note that you need to inject a special
+        # bit of text into this instruction to allow the first assistant response to be
+        # programmatically triggered (which happens in the on_client_connected handler)
+        # f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}"
    )

    # Create the AWS Nova Sonic LLM service
    llm = AWSNovaSonicLLMService(
        secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
        access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
-        region=os.getenv("AWS_REGION"),  # as of 2025-05-06, us-east-1 is the only supported region
+        # as of 2025-12-09, these are the supported regions:
+        # - Nova 2 Sonic (the default model):
+        #   - us-east-1
+        #   - us-west-2
+        #   - ap-northeast-1
+        # - Nova Sonic (the older model):
+        #   - us-east-1
+        #   - ap-northeast-1
+        region=os.getenv("AWS_REGION"),
        session_token=os.getenv("AWS_SESSION_TOKEN"),
-        voice_id="tiffany",  # matthew, tiffany, amy
+        voice_id="tiffany",
        # you could choose to pass instruction here rather than via context
        # system_instruction=system_instruction
        # you could choose to pass tools here rather than via context
@@ -117,7 +137,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    # Register function for function calls
    # you can either register a single function for all function calls, or specific functions
    # llm.register_function(None, fetch_weather_from_api)
-    llm.register_function("get_current_weather", fetch_weather_from_api)
+    llm.register_function(
+        "get_current_weather", fetch_weather_from_api, cancel_on_interruption=False
+    )

    # Set up context and context management.
    context = LLMContext(
@@ -159,10 +181,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        logger.info(f"Client connected")
        # Kick off the conversation.
        await task.queue_frames([LLMRunFrame()])
-        # HACK: for now, we need this special way of triggering the first assistant response in AWS
-        # Nova Sonic. Note that this trigger requires a special corresponding bit of text in the
-        # system instruction. In the future, simply queueing the context frame should be sufficient.
-        await llm.trigger_assistant_response()
+        # HACK: if using the older Nova Sonic (pre-2) model, you need this special way of
+        # triggering the first assistant response. Note that this trigger requires a special
+        # corresponding bit of text in the system instruction.
+        # await llm.trigger_assistant_response()

    # Handle client disconnection events
    @transport.event_handler("on_client_disconnected")
--- a/examples/foundational/44-voicemail-detection.py
+++ b/examples/foundational/44-voicemail-detection.py
@@ -113,8 +113,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        logger.info(f"Client disconnected")
        await task.cancel()

+    @voicemail.event_handler("on_conversation_detected")
+    async def on_conversation_detected(processor):
+        logger.info("Conversation detected!")
+
    @voicemail.event_handler("on_voicemail_detected")
-    async def handle_voicemail(processor):
+    async def on_voicemail_detected(processor):
        logger.info("Voicemail detected! Leaving a message...")

        # Push frames using standard Pipecat pattern
--- a/examples/foundational/49-oi-instrumentation.py
+++ b/examples/foundational/49-oi-instrumentation.py
@@ -0,0 +1,179 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+from datetime import datetime
+
+from arize.otel import register as register_arize
+from dotenv import load_dotenv
+from loguru import logger
+from phoenix.otel import register as register_phoenix
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+)
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.services.openai.stt import OpenAISTTService
+from pipecat.services.openai.tts import OpenAITTSService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+from openinference.instrumentation.pipecat import PipecatInstrumentor
+
+load_dotenv(override=True)
+
+conversation_id = f"test-conversation-001_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+debug_log_filename = os.path.join(os.getcwd(), f"pipecat_frames_{conversation_id}.log")
+print(f"_____49-oi-instrumentation.py * debug_log_filename: {debug_log_filename}")
+
+def setup_tracer_provider():
+    """
+    Setup the tracer provider.
+    """
+    project_name = os.getenv("ARIZE_PROJECT_NAME", "default")
+
+    ARIZE_SPACE_ID = os.getenv("ARIZE_SPACE_ID")
+    ARIZE_API_KEY = os.getenv("ARIZE_API_KEY")
+    if ARIZE_SPACE_ID and ARIZE_API_KEY:
+        return register_arize(
+            space_id=ARIZE_SPACE_ID,
+            api_key=ARIZE_API_KEY,
+            project_name=project_name,
+        )
+    else:
+        return register_phoenix(project_name="default")
+
+
+tracer_provider = setup_tracer_provider()
+PipecatInstrumentor().instrument(
+    tracer_provider=tracer_provider,
+    debug_log_filename=debug_log_filename,
+)
+
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info("Starting bot")
+
+    ### STT ###
+    stt = OpenAISTTService(
+        api_key=os.getenv("OPENAI_API_KEY"),
+        model="gpt-4o-transcribe",
+        prompt="Expect normal helpful conversation.",
+    )
+    ### alternative stt - cartesia ###
+    # stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))
+
+    ### LLM ###
+    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+
+    ### TTS ###
+    tts = OpenAITTSService(
+        api_key=os.getenv("OPENAI_API_KEY"),
+        voice="ballad",
+        params=OpenAITTSService.InputParams(
+            instructions="Please speak clearly and at a moderate pace."
+        ),
+    )
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. "
+            + "Your goal is to demonstrate your capabilities in a succinct way. "
+            + "Your output will be converted to audio so don't "
+            + "include special characters in your answers. "
+            + "Respond to what the user said in a creative and helpful way.",
+        }
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    ### PIPELINE ###
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    ### TASK ###
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        conversation_id=conversation_id,  # Use dynamic conversation ID for session tracking
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info("Client connected")
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info("Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/49a-thinking-anthropic.py
+++ b/examples/foundational/49a-thinking-anthropic.py
@@ -0,0 +1,161 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame, ThoughtTranscriptionMessage, TranscriptionMessage
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.transcript_processor import TranscriptProcessor
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.anthropic.llm import AnthropicLLMService
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    llm = AnthropicLLMService(
+        api_key=os.getenv("ANTHROPIC_API_KEY"),
+        params=AnthropicLLMService.InputParams(
+            thinking=AnthropicLLMService.ThinkingConfig(type="enabled", budget_tokens=2048)
+        ),
+    )
+
+    transcript = TranscriptProcessor(process_thoughts=True)
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,
+            transcript.user(),  # User transcripts
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            transcript.assistant(),  # Assistant transcripts (including thoughts)
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append(
+            {
+                "role": "user",
+                "content": "Say hello briefly.",
+            }
+        )
+        # Here are some example prompts conducive to demonstrating
+        # thinking (picked from Google and Anthropic docs).
+        # messages.append(
+        #     {
+        #         "role": "user",
+        #         "content": "Analogize photosynthesis and growing up. Keep your answer concise.",
+        #         # "content": "Compare and contrast electric cars and hybrid cars."
+        #         # "content": "Are there an infinite number of prime numbers such that n mod 4 == 3?"
+        #     }
+        # )
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    # Register event handler for transcript updates
+    @transcript.event_handler("on_transcript_update")
+    async def on_transcript_update(processor, frame):
+        for msg in frame.messages:
+            if isinstance(msg, (ThoughtTranscriptionMessage, TranscriptionMessage)):
+                timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
+                role = "THOUGHT" if isinstance(msg, ThoughtTranscriptionMessage) else msg.role
+                logger.info(f"Transcript: {timestamp}{role}: {msg.content}")
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/49b-thinking-google.py
+++ b/examples/foundational/49b-thinking-google.py
@@ -0,0 +1,166 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame, ThoughtTranscriptionMessage, TranscriptionMessage
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.transcript_processor import TranscriptProcessor
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.google.llm import GoogleLLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    llm = GoogleLLMService(
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        # model="gemini-3-pro-preview", # A more powerful reasoning model, but slower
+        params=GoogleLLMService.InputParams(
+            thinking=GoogleLLMService.ThinkingConfig(
+                # thinking_level="low", # Use this field instead of thinking_budget for Gemini 3 Pro. Defaults to "high".
+                thinking_budget=-1,  # Dynamic thinking
+                include_thoughts=True,
+            )
+        ),
+    )
+
+    transcript = TranscriptProcessor(process_thoughts=True)
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,
+            transcript.user(),  # User transcripts
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            transcript.assistant(),  # Assistant transcripts (including thoughts)
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append(
+            {
+                "role": "user",
+                "content": "Say hello briefly.",
+            }
+        )
+        # Here are some example prompts conducive to demonstrating
+        # thinking (picked from Google and Anthropic docs).
+        # messages.append(
+        #     {
+        #         "role": "user",
+        #         "content": "Analogize photosynthesis and growing up. Keep your answer concise.",
+        #         # "content": "Compare and contrast electric cars and hybrid cars."
+        #         # "content": "Are there an infinite number of prime numbers such that n mod 4 == 3?"
+        #     }
+        # )
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    # Register event handler for transcript updates
+    @transcript.event_handler("on_transcript_update")
+    async def on_transcript_update(processor, frame):
+        for msg in frame.messages:
+            if isinstance(msg, (ThoughtTranscriptionMessage, TranscriptionMessage)):
+                timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
+                role = "THOUGHT" if isinstance(msg, ThoughtTranscriptionMessage) else msg.role
+                logger.info(f"Transcript: {timestamp}{role}: {msg.content}")
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/49c-thinking-functions-anthropic.py
+++ b/examples/foundational/49c-thinking-functions-anthropic.py
@@ -0,0 +1,185 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame, ThoughtTranscriptionMessage, TranscriptionMessage
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.transcript_processor import TranscriptProcessor
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.anthropic.llm import AnthropicLLMService
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.llm_service import FunctionCallParams
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+
+async def check_flight_status(params: FunctionCallParams, flight_number: str):
+    """Check the status of a flight. Returns status (e.g., "on time", "delayed") and departure time.
+
+    Args:
+        flight_number (str): The flight number, e.g. "AA100".
+    """
+    await params.result_callback({"status": "delayed", "departure_time": "14:30"})
+
+
+async def book_taxi(params: FunctionCallParams, time: str):
+    """Book a taxi for a given time. Returns status (e.g., "done").
+
+    Args:
+        time (str): The time to book the taxi for, e.g. "15:00".
+    """
+    await params.result_callback({"status": "done"})
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    llm = AnthropicLLMService(
+        api_key=os.getenv("ANTHROPIC_API_KEY"),
+        params=AnthropicLLMService.InputParams(
+            thinking=AnthropicLLMService.ThinkingConfig(type="enabled", budget_tokens=2048)
+        ),
+    )
+
+    llm.register_direct_function(check_flight_status)
+    llm.register_direct_function(book_taxi)
+
+    tools = ToolsSchema(standard_tools=[check_flight_status, book_taxi])
+
+    transcript = TranscriptProcessor(process_thoughts=True)
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages, tools)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,
+            transcript.user(),  # User transcripts
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            transcript.assistant(),  # Assistant transcripts (including thoughts)
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append(
+            {
+                "role": "user",
+                "content": "Say hello briefly.",
+            }
+        )
+        # Here is an example prompt conducive to demonstrating thinking and
+        # function calling.
+        # This example comes from Gemini docs.
+        # messages.append(
+        #     {
+        #         "role": "user",
+        #         "content": "Check the status of flight AA100 and, if it's delayed, book me a taxi 2 hours before its departure time.",
+        #     }
+        # )
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    @transcript.event_handler("on_transcript_update")
+    async def on_transcript_update(processor, frame):
+        for msg in frame.messages:
+            if isinstance(msg, (ThoughtTranscriptionMessage, TranscriptionMessage)):
+                timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
+                role = "THOUGHT" if isinstance(msg, ThoughtTranscriptionMessage) else msg.role
+                logger.info(f"Transcript: {timestamp}{role}: {msg.content}")
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/49d-thinking-functions-google.py
+++ b/examples/foundational/49d-thinking-functions-google.py
@@ -0,0 +1,190 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame, ThoughtTranscriptionMessage, TranscriptionMessage
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.transcript_processor import TranscriptProcessor
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.google.llm import GoogleLLMService
+from pipecat.services.llm_service import FunctionCallParams
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+
+async def check_flight_status(params: FunctionCallParams, flight_number: str):
+    """Check the status of a flight. Returns status (e.g., "on time", "delayed") and departure time.
+
+    Args:
+        flight_number (str): The flight number, e.g. "AA100".
+    """
+    await params.result_callback({"status": "delayed", "departure_time": "14:30"})
+
+
+async def book_taxi(params: FunctionCallParams, time: str):
+    """Book a taxi for a given time. Returns status (e.g., "done").
+
+    Args:
+        time (str): The time to book the taxi for, e.g. "15:00".
+    """
+    await params.result_callback({"status": "done"})
+
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+    )
+
+    llm = GoogleLLMService(
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        # model="gemini-3-pro-preview", # A more powerful reasoning model, but slower
+        params=GoogleLLMService.InputParams(
+            thinking=GoogleLLMService.ThinkingConfig(
+                # thinking_level="low", # Use this field instead of thinking_budget for Gemini 3 Pro. Defaults to "high".
+                thinking_budget=-1,  # Dynamic thinking
+                include_thoughts=True,
+            )
+        ),
+    )
+
+    llm.register_direct_function(check_flight_status)
+    llm.register_direct_function(book_taxi)
+
+    tools = ToolsSchema(standard_tools=[check_flight_status, book_taxi])
+
+    transcript = TranscriptProcessor(process_thoughts=True)
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages, tools)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,
+            transcript.user(),  # User transcripts
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            transcript.assistant(),  # Assistant transcripts (including thoughts)
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append(
+            {
+                "role": "user",
+                "content": "Say hello briefly.",
+            }
+        )
+        # Here is an example prompt conducive to demonstrating thinking and
+        # function calling.
+        # This example comes from Gemini docs.
+        # messages.append(
+        #     {
+        #         "role": "user",
+        #         "content": "Check the status of flight AA100 and, if it's delayed, book me a taxi 2 hours before its departure time.",
+        #     }
+        # )
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    @transcript.event_handler("on_transcript_update")
+    async def on_transcript_update(processor, frame):
+        for msg in frame.messages:
+            if isinstance(msg, (ThoughtTranscriptionMessage, TranscriptionMessage)):
+                timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
+                role = "THOUGHT" if isinstance(msg, ThoughtTranscriptionMessage) else msg.role
+                logger.info(f"Transcript: {timestamp}{role}: {msg.content}")
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,6 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "pipecat-ai"
+
 dynamic = ["version"]
 description = "An open source framework for voice (and multimodal) assistants"
 license = "BSD-2-Clause"
@@ -45,7 +46,7 @@ Source = "https://github.com/pipecat-ai/pipecat"
 Website = "https://pipecat.ai"

 [project.optional-dependencies]
-aic = [ "aic-sdk~=1.1.0" ]
+aic = [ "aic-sdk~=1.2.0" ]
 anthropic = [ "anthropic~=0.49.0" ]
 assemblyai = [ "pipecat-ai[websockets-base]" ]
 asyncai = [ "pipecat-ai[websockets-base]" ]
@@ -55,14 +56,15 @@ azure = [ "azure-cognitiveservices-speech~=1.42.0"]
 cartesia = [ "cartesia~=2.0.3", "pipecat-ai[websockets-base]" ]
 cerebras = []
 daily = [ "daily-python~=0.22.0" ]
-deepgram = [ "deepgram-sdk~=4.7.0" ]
+deepgram = [ "deepgram-sdk~=4.7.0", "pipecat-ai[websockets-base]" ]
 deepseek = []
 elevenlabs = [ "pipecat-ai[websockets-base]" ]
 fal = [ "fal-client~=0.5.9" ]
 fireworks = []
 fish = [ "ormsgpack~=1.7.0", "pipecat-ai[websockets-base]" ]
 gladia = [ "pipecat-ai[websockets-base]" ]
-google = [ "google-cloud-speech>=2.33.0,<3", "google-cloud-texttospeech>=2.31.0,<3", "google-genai>=1.41.0,<2", "pipecat-ai[websockets-base]" ]
+google = [ "google-cloud-speech>=2.33.0,<3", "google-cloud-texttospeech>=2.31.0,<3", "google-genai>=1.51.0,<2", "pipecat-ai[websockets-base]" ]
+gradium = [ "pipecat-ai[websockets-base]" ]
 grok = []
 groq = [ "groq~=0.23.0" ]
 gstreamer = [ "pygobject~=3.50.0" ]
@@ -83,8 +85,8 @@ mistral = []
 mlx-whisper = [ "mlx-whisper~=0.4.2" ]
 moondream = [ "accelerate~=1.10.0", "einops~=0.8.0", "pyvips[binary]~=3.0.0", "timm~=1.0.13", "transformers>=4.48.0" ]
 neuphonic = [ "pipecat-ai[websockets-base]" ]
-nim = []
 noisereduce = [ "noisereduce~=3.0.3" ]
+nvidia = [ "nvidia-riva-client~=2.21.1" ]
 openai = [ "pipecat-ai[websockets-base]" ]
 openpipe = [ "openpipe>=4.50.0,<6" ]
 openrouter = []
@@ -93,7 +95,7 @@ playht = [ "pipecat-ai[websockets-base]" ]
 qwen = []
 remote-smart-turn = []
 rime = [ "pipecat-ai[websockets-base]" ]
-riva = [ "nvidia-riva-client~=2.21.1" ]
+riva = [ "pipecat-ai[nvidia]" ]
 runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.122.0", "pipecat-ai-small-webrtc-prebuilt>=1.0.0"]
 sagemaker = ["aws_sdk_sagemaker_runtime_http2; python_version>='3.12'"]
 sambanova = []
@@ -114,6 +116,20 @@ websocket = [ "pipecat-ai[websockets-base]", "fastapi>=0.115.6,<0.122.0" ]
 websockets-base = [ "websockets>=13.1,<16.0" ]
 whisper = [ "faster-whisper~=1.1.1" ]

+oiip = [
+    "arize-otel>=0.0.1",
+    "arize-phoenix>=0.0.1",
+    "openinference-instrumentation>=0.1.12",
+    "opentelemetry-api",
+    "opentelemetry-sdk",
+    "opentelemetry-semantic-conventions",
+    # Note: openinference-instrumentation-pipecat comes from [tool.uv.sources]
+    "openinference-instrumentation-pipecat"
+]
+
+[tool.uv.sources]
+openinference-instrumentation-pipecat = { path = "../../opensource/openinference/python/instrumentation/openinference-instrumentation-pipecat", editable = true }
+
 [dependency-groups]
 dev = [
    "build~=1.2.2",
@@ -129,6 +145,7 @@ dev = [
    "setuptools~=78.1.1",
    "setuptools_scm~=8.3.1",
    "python-dotenv>=1.0.1,<2.0.0",
+    "towncrier~=25.8.0",
 ]

 docs = [
@@ -159,7 +176,7 @@ where = ["src"]
    "src/pipecat/audio/dtmf/dtmf-star.wav",
 ]
 "pipecat.services.aws_nova_sonic" = ["src/pipecat/services/aws_nova_sonic/ready.wav"]
-"pipecat.audio.turn.smart_turn.data" = ["src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx"]
+"pipecat.audio.turn.smart_turn.data" = ["src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.1-cpu.onnx"]

 [tool.pytest.ini_options]
 addopts = "--verbose"
@@ -206,3 +223,44 @@ convention = "google"
 command_line = "--module pytest"
 source = ["src"]
 omit = ["*/tests/*"]
+
+[tool.towncrier]
+package = "pipecat"
+package_dir = "src"
+filename = "CHANGELOG.md"
+directory = "changelog"
+start_string = "<!-- towncrier release notes start -->\n"
+template = "changelog/_template.md.j2"
+title_format = "## [{version}] - {project_date}"
+underlines = ["", "", ""]
+wrap = true
+
+[[tool.towncrier.type]]
+directory = "added"
+name = "Added"
+showcontent = true
+
+[[tool.towncrier.type]]
+directory = "changed"
+name = "Changed"
+showcontent = true
+
+[[tool.towncrier.type]]
+directory = "deprecated"
+name = "Deprecated"
+showcontent = true
+
+[[tool.towncrier.type]]
+directory = "removed"
+name = "Removed"
+showcontent = true
+
+[[tool.towncrier.type]]
+directory = "fixed"
+name = "Fixed"
+showcontent = true
+
+[[tool.towncrier.type]]
+directory = "security"
+name = "Security"
+showcontent = true
--- a/scripts/evals/run-release-evals.py
+++ b/scripts/evals/run-release-evals.py
@@ -74,6 +74,11 @@ EVAL_CONVERSATION = EvalConfig(
    eval_speaks_first=True,
 )

+EVAL_FLIGHT_STATUS = EvalConfig(
+    prompt="Check the status of flight AA100.",
+    eval="The user says something about the status of flight AA100, such as whether it's on time or delayed.",
+)
+

 TESTS_07 = [
    # 07 series
@@ -103,7 +108,7 @@ TESTS_07 = [
    ("07o-interruptible-assemblyai.py", EVAL_SIMPLE_MATH),
    ("07q-interruptible-rime.py", EVAL_SIMPLE_MATH),
    ("07q-interruptible-rime-http.py", EVAL_SIMPLE_MATH),
-    ("07r-interruptible-riva-nim.py", EVAL_SIMPLE_MATH),
+    ("07r-interruptible-nvidia.py", EVAL_SIMPLE_MATH),
    ("07s-interruptible-google-audio-in.py", EVAL_SIMPLE_MATH),
    ("07t-interruptible-fish.py", EVAL_SIMPLE_MATH),
    ("07v-interruptible-neuphonic.py", EVAL_SIMPLE_MATH),
@@ -136,7 +141,7 @@ TESTS_14 = [
    ("14g-function-calling-grok.py", EVAL_WEATHER),
    ("14h-function-calling-azure.py", EVAL_WEATHER),
    ("14i-function-calling-fireworks.py", EVAL_WEATHER),
-    ("14j-function-calling-nim.py", EVAL_WEATHER),
+    ("14j-function-calling-nvidia.py", EVAL_WEATHER),
    ("14k-function-calling-cerebras.py", EVAL_WEATHER),
    ("14m-function-calling-openrouter.py", EVAL_WEATHER),
    ("14n-function-calling-perplexity.py", EVAL_WEATHER),
@@ -204,6 +209,13 @@ TESTS_44 = [
    ("44-voicemail-detection.py", EVAL_CONVERSATION),
 ]

+TESTS_49 = [
+    ("49a-thinking-anthropic.py", EVAL_SIMPLE_MATH),
+    ("49b-thinking-google.py", EVAL_SIMPLE_MATH),
+    ("49c-thinking-functions-anthropic.py", EVAL_FLIGHT_STATUS),
+    ("49d-thinking-functions-google.py", EVAL_FLIGHT_STATUS),
+]
+
 TESTS = [
    *TESTS_07,
    *TESTS_12,
@@ -216,6 +228,7 @@ TESTS = [
    *TESTS_40,
    *TESTS_43,
    *TESTS_44,
+    *TESTS_49,
 ]


--- a/src/pipecat/init.py
+++ b/src/pipecat/init.py
@@ -5,14 +5,20 @@
 #

 import sys
-from importlib.metadata import version
+from importlib.metadata import version as lib_version

 from loguru import logger

-__version__ = version("pipecat-ai")
+__version__ = lib_version("pipecat-ai")

 logger.info(f"ᓚᘏᗢ Pipecat {__version__} (Python {sys.version}) ᓚᘏᗢ")

+
+def version() -> str:
+    """Returns the Pipecat version."""
+    return __version__
+
+
 # We replace `asyncio.wait_for()` for `wait_for2.wait_for()` for Python < 3.12.
 #
 # In Python 3.12, `asyncio.wait_for()` is implemented in terms of
--- a/src/pipecat/adapters/services/anthropic_adapter.py
+++ b/src/pipecat/adapters/services/anthropic_adapter.py
@@ -165,9 +165,44 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):

    def _from_universal_context_message(self, message: LLMContextMessage) -> MessageParam:
        if isinstance(message, LLMSpecificMessage):
-            return copy.deepcopy(message.message)
+            return self._from_anthropic_specific_message(message)
        return self._from_standard_message(message)

+    def _from_anthropic_specific_message(self, message: LLMSpecificMessage) -> MessageParam:
+        """Convert LLMSpecificMessage to Anthropic format.
+
+        Anthropic-specific messages may either be special thought messages that
+        need to be handled in a special way, or messages already in Anthropic
+        format.
+
+        Args:
+            message: Anthropic-specific message.
+        """
+        # Handle special case of thought messages.
+        # These can be converted to standalone "assistant" messages; later
+        # these thinking messages will be properly merged into the assistant
+        # response messages before the context is sent to Anthropic for the
+        # next turn.
+        if (
+            isinstance(message.message, dict)
+            and message.message.get("type") == "thought"
+            and (text := message.message.get("text"))
+            and (signature := message.message.get("signature"))
+        ):
+            return {
+                "role": "assistant",
+                "content": [
+                    {
+                        "type": "thinking",
+                        "thinking": text,
+                        "signature": signature,
+                    }
+                ],
+            }
+
+        # Fall back to assuming that the message is already in Anthropic format
+        return copy.deepcopy(message.message)
+
    def _from_standard_message(self, message: LLMStandardMessage) -> MessageParam:
        """Convert standard universal context message to Anthropic format.

--- a/src/pipecat/adapters/services/gemini_adapter.py
+++ b/src/pipecat/adapters/services/gemini_adapter.py
@@ -209,16 +209,55 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
        system_instruction = None
        messages = []
        tool_call_id_to_name_mapping = {}
+        non_fn_thought_signatures = []

-        # Process each message, preserving Google-formatted messages and converting others
+        # Process each message, converting to Google format as needed
        for message in universal_context_messages:
-            result = self._from_universal_context_message(
+            # We have a Google-specific message; this may either be a
+            # thought-signature-containing message that we need to handle in a
+            # special way, or a message already in Google format that we can
+            # use directly
+            if isinstance(message, LLMSpecificMessage):
+                # Special handling for function-call-related thought signature
+                # messages
+                if (
+                    isinstance(message.message, dict)
+                    and message.message.get("type") == "fn_thought_signature"
+                    and (thought_signature := message.message.get("signature"))
+                ):
+                    self._apply_function_thought_signature_to_messages(
+                        thought_signature, message.message.get("tool_call_id"), messages
+                    )
+                    continue
+
+                # Special handling for non-function-call-related thought-
+                # signature-containing messages
+                if (
+                    isinstance(message.message, dict)
+                    and message.message.get("type") == "non_fn_thought_signature"
+                    and (thought_signature := message.message.get("signature"))
+                    and (bookmark := message.message.get("bookmark"))
+                ):
+                    non_fn_thought_signatures.append(
+                        {"signature": thought_signature, "bookmark": bookmark}
+                    )
+                    continue
+
+                # Fall back to assuming that the message is already in Google
+                # format
+                messages.append(message.message)
+                continue
+
+            # We have a standard universal context message; convert it to
+            # Google format
+            result = self._from_standard_message(
                message,
                params=self.MessageConversionParams(
                    already_have_system_instruction=bool(system_instruction),
                    tool_call_id_to_name_mapping=tool_call_id_to_name_mapping,
                ),
            )
+
            # Each result is either a Content or a system instruction
            if result.content:
                messages.append(result.content)
@@ -229,6 +268,10 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
            if result.tool_call_id_to_name_mapping:
                tool_call_id_to_name_mapping.update(result.tool_call_id_to_name_mapping)

+        # Apply non-function-call-related thought signatures to the appropriate
+        # messages
+        self._apply_non_function_thought_signatures_to_messages(non_fn_thought_signatures, messages)
+
        # Check if we only have function-related messages (no regular text)
        has_regular_messages = any(
            len(msg.parts) == 1
@@ -247,13 +290,6 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):

        return self.ConvertedMessages(messages=messages, system_instruction=system_instruction)

-    def _from_universal_context_message(
-        self, message: LLMContextMessage, *, params: MessageConversionParams
-    ) -> MessageConversionResult:
-        if isinstance(message, LLMSpecificMessage):
-            return self.MessageConversionResult(content=message.message)
-        return self._from_standard_message(message, params=params)
-
    def _from_standard_message(
        self, message: LLMStandardMessage, *, params: MessageConversionParams
    ) -> MessageConversionResult:
@@ -410,3 +446,137 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
            content=Content(role=role, parts=parts),
            tool_call_id_to_name_mapping=tool_call_id_to_name_mapping,
        )
+
+    def _apply_function_thought_signature_to_messages(
+        self, thought_signature: bytes, tool_call_id: str, messages: List[Content]
+    ) -> None:
+        """Apply a function-related thought signature to the corresponding function call message.
+
+        Args:
+            thought_signature: The thought signature bytes to apply.
+            tool_call_id: ID of the tool call message to find and modify.
+            messages: List of messages to search through.
+        """
+        # Search backwards through messages to find the matching function call
+        for message in reversed(messages):
+            if not isinstance(message, Content) or not message.parts:
+                continue
+            # Find the specific part with the matching function call
+            for part in message.parts:
+                if (
+                    hasattr(part, "function_call")
+                    and part.function_call
+                    and part.function_call.id == tool_call_id
+                ):
+                    part.thought_signature = thought_signature
+                    break
+            else:
+                # Continue outer loop if inner loop didn't break
+                continue
+            # Break outer loop if inner loop broke (found match)
+            break
+
+    def _apply_non_function_thought_signatures_to_messages(
+        self, thought_signatures: List[dict], messages: List[Content]
+    ) -> None:
+        """Apply (optional, but recommended) non-function-call-related thought signatures to the last part of corresponding non-function-call assistant messages.
+
+        Gemini 3 Pro (and, somewhat surprisingly, other models, too, when
+        functions are involved in the conversation) outputs thought signatures
+        at the end of assistant responses.
+
+        Args:
+            thought_signatures: A list of dicts containing:
+                - "signature": a thought signature
+                - "bookmark": a bookmark to identify the message part to apply the signature to.
+                  The bookmark may contain either:
+                    - "text"
+                    - "inline_data"
+            messages: List of messages to search through.
+        """
+        if not thought_signatures:
+            return
+
+        # For debugging, print out thought signatures and their bookmarks
+        logger.trace(f"Thought signatures to apply: {len(thought_signatures)}")
+        for ts in thought_signatures:
+            bookmark = ts.get("bookmark")
+            if bookmark.get("text"):
+                text = bookmark["text"]
+                log_display_text = f"{text[:50]}..." if len(text) > 50 else text
+                logger.trace(f" - At text: {log_display_text}")
+            elif bookmark.get("inline_data"):
+                logger.trace(f" - At inline data")
+
+        # Find all assistant (model) messages that aren't function calls
+        non_fn_assistant_messages = []
+        for message in messages:
+            if not isinstance(message, Content) or not message.parts:
+                continue
+            # Check if this is a model message without function calls
+            if message.role == "model":
+                has_function_call = any(
+                    hasattr(part, "function_call") and part.function_call for part in message.parts
+                )
+                if not has_function_call:
+                    non_fn_assistant_messages.append(message)
+
+        # Apply thought signatures to the corresponding assistant messages
+        # Match them using content heuristics, maintaining order (messages without signatures are skipped)
+        message_start_index = 0  # Track where to start searching for the next match
+        for thought_signature_dict in thought_signatures:
+            signature = thought_signature_dict.get("signature")
+            bookmark = thought_signature_dict.get("bookmark")
+            if not signature:
+                continue
+
+            # Search through remaining non-function assistant messages for a match
+            for i in range(message_start_index, len(non_fn_assistant_messages)):
+                message = non_fn_assistant_messages[i]
+                if not message.parts:
+                    continue
+
+                last_part = message.parts[-1]
+                matched = False
+
+                # If it's a text bookmark, check that the last message part text has the same text or
+                # - is a prefix of that text (in case spoken text was truncated due to interruption)
+                # - is prefixed by that text (in case bookmark represents just first chunk of multi-chunk text)
+                if bookmark_text := bookmark.get("text"):
+                    if hasattr(last_part, "text") and last_part.text:
+                        # Normalize whitespace for comparison
+                        signed_text = " ".join(bookmark_text.split())
+                        last_text = " ".join(last_part.text.split())
+                        if (
+                            last_text == signed_text
+                            or signed_text.startswith(last_text)
+                            or last_text.startswith(signed_text)
+                        ):
+                            log_display_text = (
+                                f"{last_part.text[:50]}..."
+                                if len(last_part.text) > 50
+                                else last_part.text
+                            )
+                            logger.trace(
+                                f"Applying thought signature to part with matching text: {log_display_text}"
+                            )
+                            last_part.thought_signature = signature
+                            matched = True
+
+                # Check if signed part has inline_data and last message part has matching inline_data
+                elif inline_data := bookmark.get("inline_data"):
+                    if (
+                        hasattr(last_part, "inline_data")
+                        and last_part.inline_data
+                        and last_part.inline_data.data == inline_data.data
+                    ):
+                        logger.trace(
+                            f"Applying thought signature to part with matching inline_data"
+                        )
+                        last_part.thought_signature = signature
+                        matched = True
+
+                # If we found a match, update start index and stop searching for this signed part
+                if matched:
+                    message_start_index = i + 1
+                    break
--- a/src/pipecat/audio/filters/aic_filter.py
+++ b/src/pipecat/audio/filters/aic_filter.py
@@ -39,7 +39,7 @@ class AICFilter(BaseAudioFilter):
        self,
        *,
        license_key: str = "",
-        model_type: AICModelType = AICModelType.QUAIL_L,
+        model_type: AICModelType = AICModelType.QUAIL_STT,
        enhancement_level: Optional[float] = 1.0,
        voice_gain: Optional[float] = 1.0,
        noise_gate_enable: Optional[bool] = True,
@@ -52,12 +52,27 @@ class AICFilter(BaseAudioFilter):
            enhancement_level: Optional overall enhancement strength (0.0..1.0).
            voice_gain: Optional linear gain applied to detected speech (0.0..4.0).
            noise_gate_enable: Optional enable/disable noise gate (default: True).
+
+                .. deprecated:: 1.3.0
+                    The `noise_gate_enable` parameter is deprecated and no longer has any effect.
+                    It will be removed in a future version.
        """
        self._license_key = license_key
        self._model_type = model_type

        self._enhancement_level = enhancement_level
        self._voice_gain = voice_gain
+        if noise_gate_enable is not None:
+            import warnings
+
+            with warnings.catch_warnings():
+                warnings.simplefilter("always")
+                warnings.warn(
+                    "Parameter `noise_gate_enable` is deprecated and no longer has any effect. "
+                    "It will be removed in a future version. Use AIC VAD instead (create_vad_analyzer()).",
+                    DeprecationWarning,
+                )
+
        self._noise_gate_enable = noise_gate_enable

        self._enabled = True
@@ -149,10 +164,6 @@ class AICFilter(BaseAudioFilter):
                )
            if self._voice_gain is not None:
                self._aic.set_parameter(AICParameter.VOICE_GAIN, float(self._voice_gain))
-            if self._noise_gate_enable is not None:
-                self._aic.set_parameter(
-                    AICParameter.NOISE_GATE_ENABLE, 1.0 if bool(self._noise_gate_enable) else 0.0
-                )

            self._aic_ready = True

--- a/src/pipecat/audio/turn/smart_turn/base_smart_turn.py
+++ b/src/pipecat/audio/turn/smart_turn/base_smart_turn.py
@@ -28,7 +28,6 @@ from pipecat.metrics.metrics import MetricsData, SmartTurnMetricsData
 STOP_SECS = 3
 PRE_SPEECH_MS = 0
 MAX_DURATION_SECONDS = 8  # Max allowed segment duration
-USE_ONLY_LAST_VAD_SEGMENT = True


 class SmartTurnParams(BaseTurnParams):
@@ -43,8 +42,6 @@ class SmartTurnParams(BaseTurnParams):
    stop_secs: float = STOP_SECS
    pre_speech_ms: float = PRE_SPEECH_MS
    max_duration_secs: float = MAX_DURATION_SECONDS
-    # not exposing this for now yet until the model can handle it.
-    # use_only_last_vad_segment: bool = USE_ONLY_LAST_VAD_SEGMENT


 class SmartTurnTimeoutException(Exception):
@@ -160,7 +157,7 @@ class BaseSmartTurn(BaseTurnAnalyzer):
        state, result = await loop.run_in_executor(
            self._executor, self._process_speech_segment, self._audio_buffer
        )
-        if state == EndOfTurnState.COMPLETE or USE_ONLY_LAST_VAD_SEGMENT:
+        if state == EndOfTurnState.COMPLETE:
            self._clear(state)
        logger.debug(f"End of Turn result: {state}")
        return state, result
--- a/src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.1-cpu.onnx
+++ b/src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.1-cpu.onnx
--- a/src/pipecat/audio/turn/smart_turn/fal_smart_turn.py
+++ b/src/pipecat/audio/turn/smart_turn/fal_smart_turn.py
@@ -14,6 +14,7 @@ Note: To learn more about the smart-turn model, visit:
    - https://github.com/pipecat-ai/smart-turn
 """

+import warnings
 from typing import Optional

 import aiohttp
@@ -26,6 +27,10 @@ class FalSmartTurnAnalyzer(HttpSmartTurnAnalyzer):

    Extends HttpSmartTurnAnalyzer to provide integration with Fal.ai's
    smart turn detection API endpoint with proper authentication.
+
+    .. deprecated:: 0.98.0
+        FalSmartTurnAnalyzer is deprecated and will be removed in a future version.
+        Use LocalSmartTurnAnalyzerV3 instead.
    """

    def __init__(
@@ -48,3 +53,12 @@ class FalSmartTurnAnalyzer(HttpSmartTurnAnalyzer):
        if api_key:
            headers = {"Authorization": f"Key {api_key}"}
        super().__init__(url=url, aiohttp_session=aiohttp_session, headers=headers, **kwargs)
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "FalSmartTurnAnalyzer is deprecated and will be removed in a future version. "
+                "Use LocalSmartTurnAnalyzerV3 instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
--- a/src/pipecat/audio/turn/smart_turn/local_smart_turn.py
+++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn.py
@@ -10,6 +10,7 @@ This module provides a smart turn analyzer that uses PyTorch models for
 local end-of-turn detection without requiring network connectivity.
 """

+import warnings
 from typing import Any, Dict

 import numpy as np
@@ -34,6 +35,10 @@ class LocalSmartTurnAnalyzer(BaseSmartTurn):
    Provides end-of-turn detection using locally-stored PyTorch models,
    enabling offline operation without network dependencies. Uses
    Wav2Vec2-BERT architecture for audio sequence classification.
+
+    .. deprecated:: 0.98.0
+        LocalSmartTurnAnalyzer is deprecated and will be removed in a future version.
+        Use LocalSmartTurnAnalyzerV3 instead.
    """

    def __init__(self, *, smart_turn_model_path: str, **kwargs):
@@ -46,6 +51,15 @@ class LocalSmartTurnAnalyzer(BaseSmartTurn):
        """
        super().__init__(**kwargs)

+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "LocalSmartTurnAnalyzer is deprecated and will be removed in a future version. "
+                "Use LocalSmartTurnAnalyzerV3 instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
        if not smart_turn_model_path:
            # Define the path to the pretrained model on Hugging Face
            smart_turn_model_path = "pipecat-ai/smart-turn"
--- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py
+++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py
@@ -42,17 +42,15 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn):

        Args:
            smart_turn_model_path: Path to the ONNX model file. If this is not
-                set, the bundled smart-turn-v3.0 model will be used.
+                set, the bundled smart-turn-v3.1-cpu model will be used.
            cpu_count: The number of CPUs to use for inference. Defaults to 1.
            **kwargs: Additional arguments passed to BaseSmartTurn.
        """
        super().__init__(**kwargs)

-        logger.debug("Loading Local Smart Turn v3 model...")
-
        if not smart_turn_model_path:
            # Load bundled model
-            model_name = "smart-turn-v3.0.onnx"
+            model_name = "smart-turn-v3.1-cpu.onnx"
            package_path = "pipecat.audio.turn.smart_turn.data"

            try:
@@ -70,6 +68,8 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn):
                        impresources.files(package_path).joinpath(model_name)
                    )

+        logger.debug(f"Loading Local Smart Turn v3.x model from {smart_turn_model_path}...")
+
        so = ort.SessionOptions()
        so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
        so.inter_op_num_threads = 1
@@ -79,7 +79,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn):
        self._feature_extractor = WhisperFeatureExtractor(chunk_length=8)
        self._session = ort.InferenceSession(smart_turn_model_path, sess_options=so)

-        logger.debug("Loaded Local Smart Turn v3")
+        logger.debug("Loaded Local Smart Turn v3.x")

    def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
        """Predict end-of-turn using local ONNX model."""
--- a/src/pipecat/extensions/ivr/ivr_navigator.py
+++ b/src/pipecat/extensions/ivr/ivr_navigator.py
@@ -18,8 +18,10 @@ from loguru import logger
 from pipecat.audio.dtmf.types import KeypadEntry
 from pipecat.audio.vad.vad_analyzer import VADParams
 from pipecat.frames.frames import (
+    EndFrame,
    Frame,
    LLMContextFrame,
+    LLMFullResponseEndFrame,
    LLMMessagesUpdateFrame,
    LLMTextFrame,
    OutputDTMFUrgentFrame,
@@ -149,11 +151,18 @@ class IVRProcessor(FrameProcessor):

        elif isinstance(frame, LLMTextFrame):
            # Process text through the pattern aggregator
-            result = await self._aggregator.aggregate(frame.text)
-            if result:
+            async for result in self._aggregator.aggregate(frame.text):
                # Push aggregated text that doesn't contain XML patterns
                await self.push_frame(LLMTextFrame(result.text), direction)

+        elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
+            # Flush any remaining text from the aggregator
+            remaining = await self._aggregator.flush()
+            if remaining:
+                await self.push_frame(LLMTextFrame(remaining.text), direction)
+            # Push the end frame
+            await self.push_frame(frame, direction)
+
        else:
            await self.push_frame(frame, direction)

--- a/src/pipecat/extensions/voicemail/voicemail_detector.py
+++ b/src/pipecat/extensions/voicemail/voicemail_detector.py
@@ -40,8 +40,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup
 from pipecat.services.llm_service import LLMService
-from pipecat.sync.base_notifier import BaseNotifier
-from pipecat.sync.event_notifier import EventNotifier
+from pipecat.utils.sync.base_notifier import BaseNotifier
+from pipecat.utils.sync.event_notifier import EventNotifier


 class NotifierGate(FrameProcessor):
@@ -252,7 +252,8 @@ class ClassificationProcessor(FrameProcessor):
        self._voicemail_notifier = voicemail_notifier
        self._voicemail_response_delay = voicemail_response_delay

-        # Register the voicemail detected event
+        # Register the conversation and voicemail detected events
+        self._register_event_handler("on_conversation_detected")
        self._register_event_handler("on_voicemail_detected")

        # Aggregation state for collecting complete LLM responses
@@ -350,6 +351,7 @@ class ClassificationProcessor(FrameProcessor):
            logger.info(f"{self}: CONVERSATION detected")
            await self._gate_notifier.notify()  # Close the classifier gate
            await self._conversation_notifier.notify()  # Release buffered TTS frames
+            await self._call_event_handler("on_conversation_detected")

        elif "VOICEMAIL" in response:
            # Voicemail detected - trigger voicemail handling
@@ -539,6 +541,9 @@ class VoicemailDetector(ParallelPipeline):
        custom_prompt = "Your custom classification logic here. " + VoicemailDetector.CLASSIFIER_RESPONSE_INSTRUCTION

    Events:
+        on_conversation_detected: Triggered when a human conversation is detected. The
+            event handler receives one argument: the ClassificationProcessor instance
+            which can be used to push frames.
        on_voicemail_detected: Triggered when voicemail is detected after the configured
            delay. The event handler receives one argument: the ClassificationProcessor
            instance which can be used to push frames.
@@ -701,7 +706,7 @@ VOICEMAIL SYSTEM (respond "VOICEMAIL"):
            event_name: The name of the event to handle.
            handler: The function to call when the event occurs.
        """
-        if event_name == "on_voicemail_detected":
+        if event_name in ("on_conversation_detected", "on_voicemail_detected"):
            self._classification_processor.add_event_handler(event_name, handler)
        else:
            super().add_event_handler(event_name, handler)
--- a/src/pipecat/frames/frames.py
+++ b/src/pipecat/frames/frames.py
@@ -38,7 +38,7 @@ from pipecat.utils.time import nanoseconds_to_str
 from pipecat.utils.utils import obj_count, obj_id

 if TYPE_CHECKING:
-    from pipecat.processors.aggregators.llm_context import LLMContext, NotGiven
+    from pipecat.processors.aggregators.llm_context import LLMContext, LLMContextMessage, NotGiven
    from pipecat.processors.frame_processor import FrameProcessor


@@ -186,6 +186,20 @@ class ControlFrame(Frame):
 #


+@dataclass
+class UninterruptibleFrame:
+    """A marker for data or control frames that must not be interrupted.
+
+    Frames with this mixin are still ordered normally, but unlike other frames,
+    they are preserved during interruptions: they remain in internal queues and
+    any task processing them will not be cancelled. This ensures the frame is
+    always delivered and processed to completion.
+
+    """
+
+    pass
+
+
@dataclass
 class AudioRawFrame:
    """A frame containing a chunk of raw audio.
@@ -330,7 +344,7 @@ class TextFrame(DataFrame):
    """

    text: str
-    skip_tts: bool = field(init=False)
+    skip_tts: Optional[bool] = field(init=False)
    # Whether any necessary inter-frame (leading/trailing) spaces are already
    # included in the text.
    # NOTE: Ideally this would be available at init time with a default value,
@@ -343,7 +357,7 @@ class TextFrame(DataFrame):

    def __post_init__(self):
        super().__post_init__()
-        self.skip_tts = False
+        self.skip_tts = None
        self.includes_inter_frame_spaces = False
        self.append_to_context = True

@@ -498,6 +512,15 @@ class TranscriptionMessage:
    timestamp: Optional[str] = None


+@dataclass
+class ThoughtTranscriptionMessage:
+    """An LLM thought message in a conversation transcript."""
+
+    role: Literal["assistant"] = field(default="assistant", init=False)
+    content: str
+    timestamp: Optional[str] = None
+
+
@dataclass
 class TranscriptionUpdateFrame(DataFrame):
    """Frame containing new messages added to conversation transcript.
@@ -542,7 +565,7 @@ class TranscriptionUpdateFrame(DataFrame):
        messages: List of new transcript messages that were added.
    """

-    messages: List[TranscriptionMessage]
+    messages: List[TranscriptionMessage | ThoughtTranscriptionMessage]

    def __str__(self):
        pts = format_pts(self.pts)
@@ -563,6 +586,75 @@ class LLMContextFrame(Frame):
    context: "LLMContext"


+@dataclass
+class LLMThoughtStartFrame(ControlFrame):
+    """Frame indicating the start of an LLM thought.
+
+    Parameters:
+        append_to_context: Whether the thought should be appended to the LLM context.
+            If it is appended, the `llm` field is required, since it will be
+            appended as an `LLMSpecificMessage`.
+        llm: Optional identifier of the LLM provider for LLM-specific handling.
+            Only required if `append_to_context` is True, as the thought is
+            appended to context as an `LLMSpecificMessage`.
+    """
+
+    append_to_context: bool = False
+    llm: Optional[str] = None
+
+    def __post_init__(self):
+        super().__post_init__()
+        if self.append_to_context and self.llm is None:
+            raise ValueError("When append_to_context is True, llm must be set")
+
+    def __str__(self):
+        pts = format_pts(self.pts)
+        return (
+            f"{self.name}(pts: {pts}, append_to_context: {self.append_to_context}, llm: {self.llm})"
+        )
+
+
+@dataclass
+class LLMThoughtTextFrame(DataFrame):
+    """Frame containing the text (or text chunk) of an LLM thought.
+
+    Note that despite this containing text, it is a DataFrame and not a
+    TextFrame, to avoid most typical text processing, such as TTS.
+
+    Parameters:
+        text: The text (or text chunk) of the thought.
+    """
+
+    text: str
+    includes_inter_frame_spaces: bool = field(init=False)
+
+    def __post_init__(self):
+        super().__post_init__()
+        # Assume that thought text chunks include all necessary spaces
+        self.includes_inter_frame_spaces = True
+
+    def __str__(self):
+        pts = format_pts(self.pts)
+        return f"{self.name}(pts: {pts}, thought text: {self.text})"
+
+
+@dataclass
+class LLMThoughtEndFrame(ControlFrame):
+    """Frame indicating the end of an LLM thought.
+
+    Parameters:
+        signature: Optional signature associated with the thought.
+            This is used by Anthropic, which includes a signature at the end of
+            each thought.
+    """
+
+    signature: Any = None
+
+    def __str__(self):
+        pts = format_pts(self.pts)
+        return f"{self.name}(pts: {pts}, signature: {self.signature})"
+
+
@dataclass
 class LLMMessagesFrame(DataFrame):
    """Frame containing LLM messages for chat completion.
@@ -696,6 +788,44 @@ class LLMConfigureOutputFrame(DataFrame):
    skip_tts: bool


+@dataclass
+class FunctionCallResultProperties:
+    """Properties for configuring function call result behavior.
+
+    Parameters:
+        run_llm: Whether to run the LLM after receiving this result.
+        on_context_updated: Callback to execute when context is updated.
+    """
+
+    run_llm: Optional[bool] = None
+    on_context_updated: Optional[Callable[[], Awaitable[None]]] = None
+
+
+@dataclass
+class FunctionCallResultFrame(DataFrame, UninterruptibleFrame):
+    """Frame containing the result of an LLM function call.
+
+    This is an uninterruptible frame because once a result is generated we
+    always want to update the context.
+
+    Parameters:
+        function_name: Name of the function that was executed.
+        tool_call_id: Unique identifier for the function call.
+        arguments: Arguments that were passed to the function.
+        result: The result returned by the function.
+        run_llm: Whether to run the LLM after this result.
+        properties: Additional properties for result handling.
+
+    """
+
+    function_name: str
+    tool_call_id: str
+    arguments: Any
+    result: Any
+    run_llm: Optional[bool] = None
+    properties: Optional[FunctionCallResultProperties] = None
+
+
@dataclass
 class TTSSpeakFrame(DataFrame):
    """Frame containing text that should be spoken by TTS.
@@ -835,11 +965,13 @@ class ErrorFrame(SystemFrame):
        error: Description of the error that occurred.
        fatal: Whether the error is fatal and requires bot shutdown.
        processor: The frame processor that generated the error.
+        exception: The exception that occurred.
    """

    error: str
    fatal: bool = False
    processor: Optional["FrameProcessor"] = None
+    exception: Optional[Exception] = None

    def __str__(self):
        return f"{self.name}(error: {self.error}, fatal: {self.fatal})"
@@ -1065,12 +1197,16 @@ class FunctionCallFromLLM:
        tool_call_id: A unique identifier for the function call.
        arguments: The arguments to pass to the function.
        context: The LLM context when the function call was made.
+        append_extra_context_messages: Optional extra messages to append to the
+            context after the function call message. Used to add Google
+            function-call-related thought signatures to the context.
    """

    function_name: str
    tool_call_id: str
    arguments: Mapping[str, Any]
    context: Any
+    append_extra_context_messages: Optional[List["LLMContextMessage"]] = None


@dataclass
@@ -1087,23 +1223,6 @@ class FunctionCallsStartedFrame(SystemFrame):
    function_calls: Sequence[FunctionCallFromLLM]


-@dataclass
-class FunctionCallInProgressFrame(SystemFrame):
-    """Frame signaling that a function call is currently executing.
-
-    Parameters:
-        function_name: Name of the function being executed.
-        tool_call_id: Unique identifier for this function call.
-        arguments: Arguments passed to the function.
-        cancel_on_interruption: Whether to cancel this call if interrupted.
-    """
-
-    function_name: str
-    tool_call_id: str
-    arguments: Any
-    cancel_on_interruption: bool = False
-
-
@dataclass
 class FunctionCallCancelFrame(SystemFrame):
    """Frame signaling that a function call has been cancelled.
@@ -1117,40 +1236,6 @@ class FunctionCallCancelFrame(SystemFrame):
    tool_call_id: str


-@dataclass
-class FunctionCallResultProperties:
-    """Properties for configuring function call result behavior.
-
-    Parameters:
-        run_llm: Whether to run the LLM after receiving this result.
-        on_context_updated: Callback to execute when context is updated.
-    """
-
-    run_llm: Optional[bool] = None
-    on_context_updated: Optional[Callable[[], Awaitable[None]]] = None
-
-
-@dataclass
-class FunctionCallResultFrame(SystemFrame):
-    """Frame containing the result of an LLM function call.
-
-    Parameters:
-        function_name: Name of the function that was executed.
-        tool_call_id: Unique identifier for the function call.
-        arguments: Arguments that were passed to the function.
-        result: The result returned by the function.
-        run_llm: Whether to run the LLM after this result.
-        properties: Additional properties for result handling.
-    """
-
-    function_name: str
-    tool_call_id: str
-    arguments: Any
-    result: Any
-    run_llm: Optional[bool] = None
-    properties: Optional[FunctionCallResultProperties] = None
-
-
@dataclass
 class STTMuteFrame(SystemFrame):
    """Frame to mute/unmute the Speech-to-Text service.
@@ -1630,22 +1715,46 @@ class LLMFullResponseStartFrame(ControlFrame):
    more TextFrames and a final LLMFullResponseEndFrame.
    """

-    skip_tts: bool = field(init=False)
+    skip_tts: Optional[bool] = field(init=False)

    def __post_init__(self):
        super().__post_init__()
-        self.skip_tts = False
+        self.skip_tts = None


@dataclass
 class LLMFullResponseEndFrame(ControlFrame):
    """Frame indicating the end of an LLM response."""

-    skip_tts: bool = field(init=False)
+    skip_tts: Optional[bool] = field(init=False)

    def __post_init__(self):
        super().__post_init__()
-        self.skip_tts = False
+        self.skip_tts = None
+
+
+@dataclass
+class FunctionCallInProgressFrame(ControlFrame, UninterruptibleFrame):
+    """Frame signaling that a function call is currently executing.
+
+    This is an uninterruptible frame because we always want to update the
+    context.
+
+    Parameters:
+        function_name: Name of the function being executed.
+        tool_call_id: Unique identifier for this function call.
+        arguments: Arguments passed to the function.
+        cancel_on_interruption: Whether to cancel this call if interrupted.
+        append_extra_context_messages: Optional extra messages to append to the
+            context after the function call message. Used to add Google
+            function-call-related thought signatures to the context.
+    """
+
+    function_name: str
+    tool_call_id: str
+    arguments: Any
+    cancel_on_interruption: bool = False
+    append_extra_context_messages: Optional[List["LLMContextMessage"]] = None


@dataclass
--- a/src/pipecat/observers/loggers/user_bot_latency_log_observer.py
+++ b/src/pipecat/observers/loggers/user_bot_latency_log_observer.py
@@ -15,8 +15,8 @@ from pipecat.frames.frames import (
    BotStartedSpeakingFrame,
    CancelFrame,
    EndFrame,
-    UserStartedSpeakingFrame,
-    UserStoppedSpeakingFrame,
+    VADUserStartedSpeakingFrame,
+    VADUserStoppedSpeakingFrame,
 )
 from pipecat.observers.base_observer import BaseObserver, FramePushed
 from pipecat.processors.frame_processor import FrameDirection
@@ -36,7 +36,7 @@ class UserBotLatencyLogObserver(BaseObserver):
        to calculate response latencies.
        """
        super().__init__()
-        self._processed_frames = set()
+        self._user_bot_latency_processed_frames = set()
        self._user_stopped_time = 0
        self._latencies = []

@@ -51,14 +51,14 @@ class UserBotLatencyLogObserver(BaseObserver):
            return

        # Skip already processed frames
-        if data.frame.id in self._processed_frames:
+        if data.frame.id in self._user_bot_latency_processed_frames:
            return

-        self._processed_frames.add(data.frame.id)
+        self._user_bot_latency_processed_frames.add(data.frame.id)

-        if isinstance(data.frame, UserStartedSpeakingFrame):
+        if isinstance(data.frame, VADUserStartedSpeakingFrame):
            self._user_stopped_time = 0
-        elif isinstance(data.frame, UserStoppedSpeakingFrame):
+        elif isinstance(data.frame, VADUserStoppedSpeakingFrame):
            self._user_stopped_time = time.time()
        elif isinstance(data.frame, (EndFrame, CancelFrame)):
            self._log_summary()
--- a/src/pipecat/processors/aggregators/gated_llm_context.py
+++ b/src/pipecat/processors/aggregators/gated_llm_context.py
@@ -9,7 +9,7 @@
 from pipecat.frames.frames import CancelFrame, EndFrame, Frame, LLMContextFrame, StartFrame
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
-from pipecat.sync.base_notifier import BaseNotifier
+from pipecat.utils.sync.base_notifier import BaseNotifier


 class GatedLLMContextAggregator(FrameProcessor):
--- a/src/pipecat/processors/aggregators/llm_response_universal.py
+++ b/src/pipecat/processors/aggregators/llm_response_universal.py
@@ -47,6 +47,9 @@ from pipecat.frames.frames import (
    LLMRunFrame,
    LLMSetToolChoiceFrame,
    LLMSetToolsFrame,
+    LLMThoughtEndFrame,
+    LLMThoughtStartFrame,
+    LLMThoughtTextFrame,
    SpeechControlParamsFrame,
    StartFrame,
    TextFrame,
@@ -592,6 +595,10 @@ class LLMAssistantAggregator(LLMContextAggregator):
        self._function_calls_in_progress: Dict[str, Optional[FunctionCallInProgressFrame]] = {}
        self._context_updated_tasks: Set[asyncio.Task] = set()

+        self._thought_aggregation_enabled = False
+        self._thought_llm: str = ""
+        self._thought_aggregation: List[TextPartForConcatenation] = []
+
    @property
    def has_function_calls_in_progress(self) -> bool:
        """Check if there are any function calls currently in progress.
@@ -601,6 +608,17 @@ class LLMAssistantAggregator(LLMContextAggregator):
        """
        return bool(self._function_calls_in_progress)

+    async def reset(self):
+        """Reset the aggregation state."""
+        await super().reset()
+        await self._reset_thought_aggregation()  # Just to be safe
+
+    async def _reset_thought_aggregation(self):
+        """Reset the thought aggregation state."""
+        self._thought_aggregation_enabled = False
+        self._thought_llm = ""
+        self._thought_aggregation = []
+
    async def process_frame(self, frame: Frame, direction: FrameDirection):
        """Process frames for assistant response aggregation and function call management.

@@ -619,6 +637,12 @@ class LLMAssistantAggregator(LLMContextAggregator):
            await self._handle_llm_end(frame)
        elif isinstance(frame, TextFrame):
            await self._handle_text(frame)
+        elif isinstance(frame, LLMThoughtStartFrame):
+            await self._handle_thought_start(frame)
+        elif isinstance(frame, LLMThoughtTextFrame):
+            await self._handle_thought_text(frame)
+        elif isinstance(frame, LLMThoughtEndFrame):
+            await self._handle_thought_end(frame)
        elif isinstance(frame, LLMRunFrame):
            await self._handle_llm_run(frame)
        elif isinstance(frame, LLMMessagesAppendFrame):
@@ -716,6 +740,10 @@ class LLMAssistantAggregator(LLMContextAggregator):
            }
        )

+        # Append to context any specified extra context messages
+        if frame.append_extra_context_messages:
+            self._context.add_messages(frame.append_extra_context_messages)
+
        self._function_calls_in_progress[frame.tool_call_id] = frame

    async def _handle_function_call_result(self, frame: FunctionCallResultFrame):
@@ -824,6 +852,47 @@ class LLMAssistantAggregator(LLMContextAggregator):
            )
        )

+    async def _handle_thought_start(self, frame: LLMThoughtStartFrame):
+        if not self._started:
+            return
+
+        await self._reset_thought_aggregation()
+        self._thought_aggregation_enabled = frame.append_to_context
+        self._thought_llm = frame.llm
+
+    async def _handle_thought_text(self, frame: LLMThoughtTextFrame):
+        if not self._started or not self._thought_aggregation_enabled:
+            return
+
+        # Make sure we really have text (spaces count, too!)
+        if len(frame.text) == 0:
+            return
+
+        self._thought_aggregation.append(
+            TextPartForConcatenation(
+                frame.text, includes_inter_part_spaces=frame.includes_inter_frame_spaces
+            )
+        )
+
+    async def _handle_thought_end(self, frame: LLMThoughtEndFrame):
+        if not self._started or not self._thought_aggregation_enabled:
+            return
+
+        thought = concatenate_aggregated_text(self._thought_aggregation)
+        llm = self._thought_llm
+        await self._reset_thought_aggregation()
+
+        self._context.add_message(
+            LLMSpecificMessage(
+                llm=llm,
+                message={
+                    "type": "thought",
+                    "text": thought,
+                    "signature": frame.signature,
+                },
+            )
+        )
+
    def _context_updated_task_finished(self, task: asyncio.Task):
        self._context_updated_tasks.discard(task)

--- a/src/pipecat/processors/aggregators/llm_text_processor.py
+++ b/src/pipecat/processors/aggregators/llm_text_processor.py
@@ -83,8 +83,7 @@ class LLMTextProcessor(FrameProcessor):
        await self._text_aggregator.reset()

    async def _handle_llm_text(self, in_frame: LLMTextFrame):
-        aggregation = await self._text_aggregator.aggregate(in_frame.text)
-        if aggregation:
+        async for aggregation in self._text_aggregator.aggregate(in_frame.text):
            out_frame = AggregatedTextFrame(
                text=aggregation.text,
                aggregated_by=aggregation.type,
@@ -92,15 +91,13 @@ class LLMTextProcessor(FrameProcessor):
            out_frame.skip_tts = in_frame.skip_tts
            await self.push_frame(out_frame)

-    async def _handle_llm_end(self, skip_tts: bool = False):
-        # Flush any remaining aggregated text at the end of the LLM response
-        aggregation = self._text_aggregator.text
-        await self._text_aggregator.reset()
-        text = aggregation.text.strip()
-        if text:
+    async def _handle_llm_end(self, skip_tts: Optional[bool] = None):
+        # Flush any remaining text
+        remaining = await self._text_aggregator.flush()
+        if remaining:
            out_frame = AggregatedTextFrame(
-                text=text,
-                aggregated_by=aggregation.type,
+                text=remaining.text,
+                aggregated_by=remaining.type,
            )
            out_frame.skip_tts = skip_tts
            await self.push_frame(out_frame)
--- a/src/pipecat/processors/filters/wake_check_filter.py
+++ b/src/pipecat/processors/filters/wake_check_filter.py
@@ -126,6 +126,4 @@ class WakeCheckFilter(FrameProcessor):
            else:
                await self.push_frame(frame, direction)
        except Exception as e:
-            error_msg = f"Error in wake word filter: {e}"
-            logger.exception(error_msg)
-            await self.push_error(ErrorFrame(error_msg))
+            await self.push_error(error_msg=f"Error in wake word filter: {e}", exception=e)
--- a/src/pipecat/processors/filters/wake_notifier_filter.py
+++ b/src/pipecat/processors/filters/wake_notifier_filter.py
@@ -10,7 +10,7 @@ from typing import Awaitable, Callable, Tuple, Type

 from pipecat.frames.frames import Frame
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
-from pipecat.sync.base_notifier import BaseNotifier
+from pipecat.utils.sync.base_notifier import BaseNotifier


 class WakeNotifierFilter(FrameProcessor):
--- a/src/pipecat/processors/frame_processor.py
+++ b/src/pipecat/processors/frame_processor.py
@@ -12,6 +12,7 @@ management, and frame flow control mechanisms.
 """

 import asyncio
+import traceback
 from dataclasses import dataclass
 from enum import Enum
 from typing import Any, Awaitable, Callable, Coroutine, List, Optional, Sequence, Tuple, Type
@@ -32,6 +33,7 @@ from pipecat.frames.frames import (
    InterruptionTaskFrame,
    StartFrame,
    SystemFrame,
+    UninterruptibleFrame,
 )
 from pipecat.metrics.metrics import LLMTokenUsage, MetricsData
 from pipecat.observers.base_observer import BaseObserver, FrameProcessed, FramePushed
@@ -142,6 +144,7 @@ class FrameProcessor(BaseObject):
    - on_after_process_frame: Called after a frame is processed
    - on_before_push_frame: Called before a frame is pushed
    - on_after_push_frame: Called after a frame is pushed
+    - on_error: Called when an error is raised in the frame processing.
    """

    def __init__(
@@ -209,6 +212,7 @@ class FrameProcessor(BaseObject):
        # The input task that handles all types of frames. It processes system
        # frames right away and queues non-system frames for later processing.
        self.__should_block_system_frames = False
+        self.__input_queue = FrameProcessorQueue()
        self.__input_event: Optional[asyncio.Event] = None
        self.__input_frame_task: Optional[asyncio.Task] = None

@@ -218,8 +222,10 @@ class FrameProcessor(BaseObject):
        # called. To resume processing frames we need to call
        # `resume_processing_frames()` which will wake up the event.
        self.__should_block_frames = False
+        self.__process_queue = asyncio.Queue()
        self.__process_event: Optional[asyncio.Event] = None
        self.__process_frame_task: Optional[asyncio.Task] = None
+        self.__process_current_frame: Optional[Frame] = None

        # To interrupt a pipeline, we push an `InterruptionTaskFrame` upstream.
        # Then we wait for the corresponding `InterruptionFrame` to travel from
@@ -234,6 +240,7 @@ class FrameProcessor(BaseObject):
        self._register_event_handler("on_after_process_frame", sync=True)
        self._register_event_handler("on_before_push_frame", sync=True)
        self._register_event_handler("on_after_push_frame", sync=True)
+        self._register_event_handler("on_error", sync=True)

    @property
    def id(self) -> int:
@@ -630,7 +637,43 @@ class FrameProcessor(BaseObject):
        elif isinstance(frame, (FrameProcessorResumeFrame, FrameProcessorResumeUrgentFrame)):
            await self.__resume(frame)

-    async def push_error(self, error: ErrorFrame):
+    async def push_error(
+        self,
+        error_msg: str,
+        exception: Optional[Exception] = None,
+        fatal: bool = False,
+    ):
+        """Creates and pushes an ErrorFrame upstream.
+
+        Creates and pushes an ErrorFrame upstream to notify other processors in the
+        pipeline about an error condition. The error frame will include context about
+        which processor generated the error.
+
+        Args:
+            error_msg: Descriptive message explaining the error condition.
+            exception: Optional exception object that caused the error, if available.
+                This provides additional context for debugging and error handling.
+            fatal: Whether this error should be considered fatal to the pipeline.
+                Fatal errors typically cause the entire pipeline to stop processing.
+                Defaults to False for non-fatal errors.
+
+        Example::
+
+            ```python
+            # Non-fatal error
+            await self.push_error("Failed to process audio chunk, skipping")
+
+            # Fatal error with exception context
+            try:
+                result = some_critical_operation()
+            except Exception as e:
+                await self.push_error("Critical operation failed", exception=e, fatal=True)
+            ```
+        """
+        error_frame = ErrorFrame(error=error_msg, fatal=fatal, exception=exception, processor=self)
+        await self.push_error_frame(error=error_frame)
+
+    async def push_error_frame(self, error: ErrorFrame):
        """Push an error frame upstream.

        Args:
@@ -638,6 +681,18 @@ class FrameProcessor(BaseObject):
        """
        if not error.processor:
            error.processor = self
+        await self._call_event_handler("on_error", error)
+
+        if error.exception:
+            tb = traceback.extract_tb(error.exception.__traceback__)
+            last = tb[-1]
+            error_message = (
+                f"{error.processor} exception ({last.filename}:{last.lineno}): {error.error}"
+            )
+        else:
+            error_message = f"{error.processor} error: {error.error}"
+
+        logger.error(error_message)
        await self.push_frame(error, FrameDirection.UPSTREAM)

    async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
@@ -754,13 +809,19 @@ class FrameProcessor(BaseObject):
                # interruption). Instead we just drain the queue because this is
                # an interruption.
                self.__reset_process_task()
+            elif isinstance(self.__process_current_frame, UninterruptibleFrame):
+                # We don't want to cancel UninterruptibleFrame, so we simply
+                # cleanup the queue.
+                self.__reset_process_queue()
            else:
-                # Cancel and re-create the process task including the queue.
+                # Cancel and re-create the process task.
                await self.__cancel_process_task()
                self.__create_process_task()
        except Exception as e:
-            logger.exception(f"Uncaught exception in {self} when handling _start_interruption: {e}")
-            await self.push_error(ErrorFrame(str(e)))
+            await self.push_error(
+                error_msg=f"Uncaught exception handling _start_interruption: {e}",
+                exception=e,
+            )

    async def __internal_push_frame(self, frame: Frame, direction: FrameDirection):
        """Internal method to push frames to adjacent processors.
@@ -797,8 +858,7 @@ class FrameProcessor(BaseObject):
                    await self._observer.on_push_frame(data)
                await self._prev.queue_frame(frame, direction)
        except Exception as e:
-            logger.exception(f"Uncaught exception in {self}: {e}")
-            await self.push_error(ErrorFrame(str(e)))
+            await self.push_error(error_msg=f"Uncaught exception: {e}", exception=e)

    def _check_started(self, frame: Frame):
        """Check if the processor has been started.
@@ -820,7 +880,6 @@ class FrameProcessor(BaseObject):

        if not self.__input_frame_task:
            self.__input_event = asyncio.Event()
-            self.__input_queue = FrameProcessorQueue()
            self.__input_frame_task = self.create_task(self.__input_frame_task_handler())

    async def __cancel_input_task(self):
@@ -838,9 +897,7 @@ class FrameProcessor(BaseObject):
            return

        if not self.__process_frame_task:
-            self.__should_block_frames = False
-            self.__process_event = asyncio.Event()
-            self.__process_queue = asyncio.Queue()
+            self.__reset_process_task()
            self.__process_frame_task = self.create_task(self.__process_frame_task_handler())

    def __reset_process_task(self):
@@ -850,10 +907,26 @@ class FrameProcessor(BaseObject):

        self.__should_block_frames = False
        self.__process_event = asyncio.Event()
+        self.__reset_process_queue()
+
+    def __reset_process_queue(self):
+        """Reset non-system frame processing queue."""
+        # Create a new queue to insert UninterruptibleFrame frames.
+        new_queue = asyncio.Queue()
+
+        # Process current queue and keep UninterruptibleFrame frames.
        while not self.__process_queue.empty():
-            self.__process_queue.get_nowait()
+            item = self.__process_queue.get_nowait()
+            if isinstance(item, UninterruptibleFrame):
+                new_queue.put_nowait(item)
            self.__process_queue.task_done()

+        # Put back UninterruptibleFrame frames into our process queue.
+        while not new_queue.empty():
+            item = new_queue.get_nowait()
+            self.__process_queue.put_nowait(item)
+            new_queue.task_done()
+
    async def __cancel_process_task(self):
        """Cancel the non-system frame processing task."""
        if self.__process_frame_task:
@@ -874,8 +947,7 @@ class FrameProcessor(BaseObject):

            await self._call_event_handler("on_after_process_frame", frame)
        except Exception as e:
-            logger.exception(f"{self}: error processing frame: {e}")
-            await self.push_error(ErrorFrame(str(e)))
+            await self.push_error(error_msg=f"Error processing frame: {e}", exception=e)

    async def __input_frame_task_handler(self):
        """Handle frames from the input queue.
@@ -908,8 +980,12 @@ class FrameProcessor(BaseObject):
    async def __process_frame_task_handler(self):
        """Handle non-system frames from the process queue."""
        while True:
+            self.__process_current_frame = None
+
            (frame, direction, callback) = await self.__process_queue.get()

+            self.__process_current_frame = frame
+
            if self.__should_block_frames and self.__process_event:
                logger.trace(f"{self}: frame processing paused")
                await self.__process_event.wait()
--- a/src/pipecat/processors/frameworks/langchain.py
+++ b/src/pipecat/processors/frameworks/langchain.py
@@ -24,7 +24,7 @@ try:
    from langchain_core.messages import AIMessageChunk
    from langchain_core.runnables import Runnable
 except ModuleNotFoundError as e:
-    logger.exception("In order to use Langchain, you need to `pip install pipecat-ai[langchain]`. ")
+    logger.error("In order to use Langchain, you need to `pip install pipecat-ai[langchain]`. ")
    raise Exception(f"Missing module: {e}")


@@ -113,6 +113,6 @@ class LangchainProcessor(FrameProcessor):
        except GeneratorExit:
            logger.warning(f"{self} generator was closed prematurely")
        except Exception as e:
-            logger.exception(f"{self} an unknown error occurred: {e}")
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            await self.push_frame(LLMFullResponseEndFrame())
--- a/src/pipecat/processors/frameworks/rtvi.py
+++ b/src/pipecat/processors/frameworks/rtvi.py
@@ -935,8 +935,8 @@ class RTVIObserverParams:
        system_logs_enabled: Indicates if system logs should be sent.
        errors_enabled: [Deprecated] Indicates if errors messages should be sent.
        skip_aggregator_types: List of aggregation types to skip sending as tts/output messages.
-          Note: if using this to avoid sending secure information, be sure to also disable
-                bot_llm_enabled to avoid leaking through LLM messages.
+            Note: if using this to avoid sending secure information, be sure to also disable
+            bot_llm_enabled to avoid leaking through LLM messages.
        bot_output_transforms: A list of callables to transform text before just before sending it
            to TTS. Each callable takes the aggregated text and its type, and returns the
            transformed text. To register, provide a list of tuples of
--- a/src/pipecat/processors/frameworks/strands_agents.py
+++ b/src/pipecat/processors/frameworks/strands_agents.py
@@ -23,7 +23,7 @@ try:
    from strands import Agent
    from strands.multiagent.graph import Graph
 except ModuleNotFoundError as e:
-    logger.exception("In order to use Strands Agents, you need to `pip install strands-agents`.")
+    logger.error("In order to use Strands Agents, you need to `pip install strands-agents`.")
    raise Exception(f"Missing module: {e}")


@@ -143,7 +143,7 @@ class StrandsAgentsProcessor(FrameProcessor):
        except GeneratorExit:
            logger.warning(f"{self} generator was closed prematurely")
        except Exception as e:
-            logger.exception(f"{self} an unknown error occurred: {e}")
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            if ttfb_tracking:
                await self.stop_ttfb_metrics()
--- a/src/pipecat/processors/transcript_processor.py
+++ b/src/pipecat/processors/transcript_processor.py
@@ -15,17 +15,19 @@ from typing import List, Optional
 from loguru import logger

 from pipecat.frames.frames import (
-    BotStartedSpeakingFrame,
    BotStoppedSpeakingFrame,
    CancelFrame,
    EndFrame,
    Frame,
    InterruptionFrame,
+    LLMThoughtEndFrame,
+    LLMThoughtStartFrame,
+    LLMThoughtTextFrame,
+    ThoughtTranscriptionMessage,
    TranscriptionFrame,
    TranscriptionMessage,
    TranscriptionUpdateFrame,
    TTSTextFrame,
-    UserStartedSpeakingFrame,
 )
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregated_text
@@ -83,92 +85,98 @@ class UserTranscriptProcessor(BaseTranscriptProcessor):


 class AssistantTranscriptProcessor(BaseTranscriptProcessor):
-    """Processes assistant TTS text frames into timestamped conversation messages.
+    """Processes assistant TTS text frames and LLM thought frames into timestamped messages.

-    This processor aggregates TTS text frames into complete utterances and emits them as
-    transcript messages. Utterances are completed when:
+    This processor aggregates both TTS text frames and LLM thought frames into
+    complete utterances and thoughts, emitting them as transcript messages.

+    An assistant utterance is completed when:
    - The bot stops speaking (BotStoppedSpeakingFrame)
    - The bot is interrupted (InterruptionFrame)
-    - The pipeline ends (EndFrame)
+    - The pipeline ends (EndFrame, CancelFrame)
+
+    A thought is completed when:
+    - The thought ends (LLMThoughtEndFrame)
+    - The bot is interrupted (InterruptionFrame)
+    - The pipeline ends (EndFrame, CancelFrame)
    """

-    def __init__(self, **kwargs):
+    def __init__(self, *, process_thoughts: bool = False, **kwargs):
        """Initialize processor with aggregation state.

        Args:
+            process_thoughts: Whether to process LLM thought frames. Defaults to False.
            **kwargs: Additional arguments passed to parent class.
        """
        super().__init__(**kwargs)
-        self._current_text_parts: List[TextPartForConcatenation] = []
-        self._aggregation_start_time: Optional[str] = None

-    async def _emit_aggregated_text(self):
+        self._process_thoughts = process_thoughts
+        self._current_assistant_text_parts: List[TextPartForConcatenation] = []
+        self._assistant_text_start_time: Optional[str] = None
+
+        self._current_thought_parts: List[TextPartForConcatenation] = []
+        self._thought_start_time: Optional[str] = None
+        self._thought_active = False
+
+    async def _emit_aggregated_assistant_text(self):
        """Aggregates and emits text fragments as a transcript message.

-        This method uses a heuristic to automatically detect whether text fragments
-        contain embedded spacing (spaces at the beginning or end of fragments) or not,
-        and applies the appropriate joining strategy. It handles fragments from different
-        TTS services with different formatting patterns.
-
-        Examples:
-            Fragments with embedded spacing (concatenated)::
-
-                TTSTextFrame: ["Hello"]
-                TTSTextFrame: [" there"]  # Leading space
-                TTSTextFrame: ["!"]
-                TTSTextFrame: [" How"]    # Leading space
-                TTSTextFrame: ["'s"]
-                TTSTextFrame: [" it"]     # Leading space
-
-                Result: "Hello there! How's it"
-
-            Fragments with trailing spaces (concatenated)::
-
-                TTSTextFrame: ["Hel"]
-                TTSTextFrame: ["lo "]     # Trailing space
-                TTSTextFrame: ["to "]     # Trailing space
-                TTSTextFrame: ["you"]
-
-                Result: "Hello to you"
-
-            Word-by-word fragments without spacing (joined with spaces)::
-
-                TTSTextFrame: ["Hello"]
-                TTSTextFrame: ["there"]
-                TTSTextFrame: ["how"]
-                TTSTextFrame: ["are"]
-                TTSTextFrame: ["you"]
-
-                Result: "Hello there how are you"
+        This method aggregates text fragments that may arrive in multiple
+        TTSTextFrame instances and emits them as a single TranscriptionMessage.
        """
-        if self._current_text_parts and self._aggregation_start_time:
-            content = concatenate_aggregated_text(self._current_text_parts)
+        if self._current_assistant_text_parts and self._assistant_text_start_time:
+            content = concatenate_aggregated_text(self._current_assistant_text_parts)
            if content:
                logger.trace(f"Emitting aggregated assistant message: {content}")
                message = TranscriptionMessage(
                    role="assistant",
                    content=content,
-                    timestamp=self._aggregation_start_time,
+                    timestamp=self._assistant_text_start_time,
                )
                await self._emit_update([message])
            else:
                logger.trace("No content to emit after stripping whitespace")

            # Reset aggregation state
-            self._current_text_parts = []
-            self._aggregation_start_time = None
+            self._current_assistant_text_parts = []
+            self._assistant_text_start_time = None
+
+    async def _emit_aggregated_thought(self):
+        """Aggregates and emits thought text fragments as a thought transcript message.
+
+        This method aggregates thought fragments that may arrive in multiple
+        LLMThoughtTextFrame instances and emits them as a single ThoughtTranscriptionMessage.
+        """
+        if self._current_thought_parts and self._thought_start_time:
+            content = concatenate_aggregated_text(self._current_thought_parts)
+            if content:
+                logger.trace(f"Emitting aggregated thought message: {content}")
+                message = ThoughtTranscriptionMessage(
+                    content=content,
+                    timestamp=self._thought_start_time,
+                )
+                await self._emit_update([message])
+            else:
+                logger.trace("No thought content to emit after stripping whitespace")
+
+            # Reset aggregation state
+            self._current_thought_parts = []
+            self._thought_start_time = None
+            self._thought_active = False

    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        """Process frames into assistant conversation messages.
+        """Process frames into assistant conversation messages and thought messages.

        Handles different frame types:

        - TTSTextFrame: Aggregates text for current utterance
+        - LLMThoughtStartFrame: Begins aggregating a new thought
+        - LLMThoughtTextFrame: Aggregates text for current thought
+        - LLMThoughtEndFrame: Completes current thought
        - BotStoppedSpeakingFrame: Completes current utterance
-        - InterruptionFrame: Completes current utterance due to interruption
-        - EndFrame: Completes current utterance at pipeline end
-        - CancelFrame: Completes current utterance due to cancellation
+        - InterruptionFrame: Completes current utterance and thought due to interruption
+        - EndFrame: Completes current utterance and thought at pipeline end
+        - CancelFrame: Completes current utterance and thought due to cancellation

        Args:
            frame: Input frame to process.
@@ -180,14 +188,40 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
            # Push frame first otherwise our emitted transcription update frame
            # might get cleaned up.
            await self.push_frame(frame, direction)
-            # Emit accumulated text with interruptions
-            await self._emit_aggregated_text()
+            # Emit accumulated text and thought with interruptions
+            await self._emit_aggregated_assistant_text()
+            if self._process_thoughts and self._thought_active:
+                await self._emit_aggregated_thought()
+        elif isinstance(frame, LLMThoughtStartFrame):
+            # Start a new thought
+            if self._process_thoughts:
+                self._thought_active = True
+                self._thought_start_time = time_now_iso8601()
+                self._current_thought_parts = []
+            # Push frame.
+            await self.push_frame(frame, direction)
+        elif isinstance(frame, LLMThoughtTextFrame):
+            # Aggregate thought text if we have an active thought
+            if self._process_thoughts and self._thought_active:
+                self._current_thought_parts.append(
+                    TextPartForConcatenation(
+                        frame.text, includes_inter_part_spaces=frame.includes_inter_frame_spaces
+                    )
+                )
+            # Push frame.
+            await self.push_frame(frame, direction)
+        elif isinstance(frame, LLMThoughtEndFrame):
+            # Emit accumulated thought when thought ends
+            if self._process_thoughts and self._thought_active:
+                await self._emit_aggregated_thought()
+            # Push frame.
+            await self.push_frame(frame, direction)
        elif isinstance(frame, TTSTextFrame):
            # Start timestamp on first text part
-            if not self._aggregation_start_time:
-                self._aggregation_start_time = time_now_iso8601()
+            if not self._assistant_text_start_time:
+                self._assistant_text_start_time = time_now_iso8601()

-            self._current_text_parts.append(
+            self._current_assistant_text_parts.append(
                TextPartForConcatenation(
                    frame.text, includes_inter_part_spaces=frame.includes_inter_frame_spaces
                )
@@ -197,7 +231,10 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
            await self.push_frame(frame, direction)
        elif isinstance(frame, (BotStoppedSpeakingFrame, EndFrame)):
            # Emit accumulated text when bot finishes speaking or pipeline ends.
-            await self._emit_aggregated_text()
+            await self._emit_aggregated_assistant_text()
+            # Emit accumulated thought at pipeline end if still active
+            if isinstance(frame, EndFrame) and self._process_thoughts and self._thought_active:
+                await self._emit_aggregated_thought()
            # Push frame.
            await self.push_frame(frame, direction)
        else:
@@ -208,7 +245,8 @@ class TranscriptProcessor:
    """Factory for creating and managing transcript processors.

    Provides unified access to user and assistant transcript processors
-    with shared event handling.
+    with shared event handling. The assistant processor handles both TTS text
+    and LLM thought frames.

    Example::

@@ -223,7 +261,7 @@ class TranscriptProcessor:
                llm,
                tts,
                transport.output(),
-                transcript.assistant_tts(),     # Assistant transcripts
+                transcript.assistant(),         # Assistant transcripts (including thoughts)
                context_aggregator.assistant(),
            ]
        )
@@ -233,8 +271,14 @@ class TranscriptProcessor:
            print(f"New messages: {frame.messages}")
    """

-    def __init__(self):
-        """Initialize factory."""
+    def __init__(self, *, process_thoughts: bool = False):
+        """Initialize factory.
+
+        Args:
+            process_thoughts: Whether the assistant processor should handle LLM thought
+                frames. Defaults to False.
+        """
+        self._process_thoughts = process_thoughts
        self._user_processor = None
        self._assistant_processor = None
        self._event_handlers = {}
@@ -269,7 +313,9 @@ class TranscriptProcessor:
            The assistant transcript processor instance.
        """
        if self._assistant_processor is None:
-            self._assistant_processor = AssistantTranscriptProcessor(**kwargs)
+            self._assistant_processor = AssistantTranscriptProcessor(
+                process_thoughts=self._process_thoughts, **kwargs
+            )
            # Apply any registered event handlers
            for event_name, handler in self._event_handlers.items():

@@ -308,267 +354,3 @@ class TranscriptProcessor:
            return handler

        return decorator
-
-
-class TurnAwareTranscriptProcessor(BaseTranscriptProcessor):
-    """Processes transcripts with turn boundary awareness.
-
-    This processor combines user and assistant transcript tracking with turn
-    detection, emitting events when turns start and end. It correctly handles
-    interruptions by only capturing what was actually spoken.
-
-    Turn boundaries are detected based on:
-    - User started speaking (UserStartedSpeakingFrame)
-    - Bot stopped speaking (BotStoppedSpeakingFrame)
-    - Interruptions (InterruptionFrame)
-
-    Events:
-        on_turn_started: Emitted when a new turn begins.
-            Handler signature: async def handler(processor, turn_number)
-
-        on_turn_ended: Emitted when a turn ends.
-            Handler signature: async def handler(processor, turn_number,
-                                                user_transcript, assistant_transcript,
-                                                was_interrupted)
-
-        on_transcript_update: Inherited from BaseTranscriptProcessor, emitted for
-            individual transcript messages.
-
-    Example::
-
-        turn_processor = TurnAwareTranscriptProcessor()
-
-        @turn_processor.event_handler("on_turn_started")
-        async def handle_turn_started(processor, turn_number):
-            print(f"Turn {turn_number} started")
-
-        @turn_processor.event_handler("on_turn_ended")
-        async def handle_turn_ended(processor, turn_number, user_text, assistant_text, interrupted):
-            print(f"Turn {turn_number} ended")
-            print(f"User said: {user_text}")
-            print(f"Assistant said: {assistant_text}")
-            print(f"Was interrupted: {interrupted}")
-
-        pipeline = Pipeline([
-            transport.input(),
-            stt,
-            turn_processor,
-            context_aggregator.user(),
-            llm,
-            tts,
-            transport.output(),
-            context_aggregator.assistant(),
-        ])
-    """
-
-    def __init__(self, **kwargs):
-        """Initialize the turn-aware transcript processor.
-
-        Args:
-            **kwargs: Additional arguments passed to parent class.
-        """
-        super().__init__(**kwargs)
-
-        # Turn tracking state
-        self._turn_number = 0
-        self._turn_active = False
-        self._turn_start_time: Optional[str] = None
-
-        # Accumulate text for current turn
-        self._current_turn_user_parts: List[TextPartForConcatenation] = []
-        self._current_turn_assistant_parts: List[TextPartForConcatenation] = []
-
-        # Track bot speaking state
-        self._bot_is_speaking = False
-
-        # Register turn events
-        self._register_event_handler("on_turn_started")
-        self._register_event_handler("on_turn_ended")
-
-    async def _start_turn(self):
-        """Start a new turn."""
-        if not self._turn_active:
-            self._turn_number += 1
-            self._turn_active = True
-            self._turn_start_time = time_now_iso8601()
-            self._current_turn_user_parts = []
-            self._current_turn_assistant_parts = []
-
-            logger.debug(f"Turn {self._turn_number} started")
-            await self._call_event_handler("on_turn_started", self._turn_number)
-
-    async def _end_turn(self, was_interrupted: bool = False):
-        """End the current turn and emit aggregated transcripts.
-
-        Args:
-            was_interrupted: Whether the turn ended due to an interruption.
-        """
-        if not self._turn_active:
-            return
-
-        # Aggregate user text
-        user_transcript = ""
-        if self._current_turn_user_parts:
-            user_transcript = concatenate_aggregated_text(self._current_turn_user_parts)
-
-        # Aggregate assistant text
-        assistant_transcript = ""
-        if self._current_turn_assistant_parts:
-            assistant_transcript = concatenate_aggregated_text(self._current_turn_assistant_parts)
-
-        # Emit turn ended event
-        logger.debug(
-            f"Turn {self._turn_number} ended (interrupted={was_interrupted}). "
-            f"User: '{user_transcript}', Assistant: '{assistant_transcript}'"
-        )
-        await self._call_event_handler(
-            "on_turn_ended",
-            self._turn_number,
-            user_transcript,
-            assistant_transcript,
-            was_interrupted,
-        )
-
-        # Reset turn state
-        self._turn_active = False
-        self._current_turn_user_parts = []
-        self._current_turn_assistant_parts = []
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        """Process frames for turn-aware transcript tracking.
-
-        Handles:
-        - UserStartedSpeakingFrame: Start new turn
-        - TranscriptionFrame: Accumulate user speech and emit transcript message
-        - BotStartedSpeakingFrame: Track bot speaking state
-        - TTSTextFrame: Accumulate assistant speech
-        - BotStoppedSpeakingFrame: End turn if no interruption pending
-        - InterruptionFrame: End turn immediately as interrupted
-        - EndFrame/CancelFrame: End any active turn
-
-        Args:
-            frame: Input frame to process.
-            direction: Frame processing direction.
-        """
-        await super().process_frame(frame, direction)
-
-        if isinstance(frame, UserStartedSpeakingFrame):
-            # User started speaking
-            if self._bot_is_speaking:
-                # This is an interruption - end the current turn with what was spoken
-                if self._current_turn_assistant_parts:
-                    assistant_content = concatenate_aggregated_text(
-                        self._current_turn_assistant_parts
-                    )
-                    if assistant_content:
-                        message = TranscriptionMessage(
-                            role="assistant",
-                            content=assistant_content,
-                            timestamp=self._turn_start_time or time_now_iso8601(),
-                        )
-                        await self._emit_update([message])
-                await self._end_turn(was_interrupted=True)
-                self._bot_is_speaking = False
-            elif self._turn_active:
-                # Previous turn is ending normally (bot finished speaking)
-                if self._current_turn_assistant_parts:
-                    assistant_content = concatenate_aggregated_text(
-                        self._current_turn_assistant_parts
-                    )
-                    if assistant_content:
-                        message = TranscriptionMessage(
-                            role="assistant",
-                            content=assistant_content,
-                            timestamp=self._turn_start_time or time_now_iso8601(),
-                        )
-                        await self._emit_update([message])
-                await self._end_turn(was_interrupted=False)
-
-            # Start a new turn
-            await self._start_turn()
-            await self.push_frame(frame, direction)
-
-        elif isinstance(frame, TranscriptionFrame):
-            # Accumulate user speech for the current turn
-            if self._turn_active:
-                self._current_turn_user_parts.append(
-                    TextPartForConcatenation(frame.text, includes_inter_part_spaces=True)
-                )
-
-            # Also emit individual transcript message
-            message = TranscriptionMessage(
-                role="user",
-                user_id=frame.user_id,
-                content=frame.text,
-                timestamp=frame.timestamp,
-            )
-            await self._emit_update([message])
-            await self.push_frame(frame, direction)
-
-        elif isinstance(frame, BotStartedSpeakingFrame):
-            # Bot started speaking
-            self._bot_is_speaking = True
-            await self.push_frame(frame, direction)
-
-        elif isinstance(frame, TTSTextFrame):
-            # Accumulate assistant speech for the current turn
-            if self._turn_active:
-                self._current_turn_assistant_parts.append(
-                    TextPartForConcatenation(
-                        frame.text, includes_inter_part_spaces=frame.includes_inter_frame_spaces
-                    )
-                )
-            await self.push_frame(frame, direction)
-
-        elif isinstance(frame, BotStoppedSpeakingFrame):
-            # Bot stopped speaking - just mark it, don't end turn yet
-            # Turn will end when next user speaks or pipeline ends
-            self._bot_is_speaking = False
-            await self.push_frame(frame, direction)
-
-        elif isinstance(frame, InterruptionFrame):
-            # Emit assistant transcript message with what was spoken before interruption
-            if self._current_turn_assistant_parts:
-                assistant_content = concatenate_aggregated_text(self._current_turn_assistant_parts)
-                if assistant_content:
-                    message = TranscriptionMessage(
-                        role="assistant",
-                        content=assistant_content,
-                        timestamp=self._turn_start_time or time_now_iso8601(),
-                    )
-                    await self._emit_update([message])
-
-            # Push frame first to ensure proper cleanup
-            await self.push_frame(frame, direction)
-
-            # End turn as interrupted
-            await self._end_turn(was_interrupted=True)
-            self._bot_is_speaking = False
-
-        elif isinstance(frame, (EndFrame, CancelFrame)):
-            # Pipeline ending - finalize any active turn
-            if self._turn_active:
-                # Emit any pending assistant transcript (allow time for TTSTextFrames to be processed)
-                # Give a brief moment for any pending frames to process
-                import asyncio
-
-                await asyncio.sleep(0.001)
-
-                if self._current_turn_assistant_parts:
-                    assistant_content = concatenate_aggregated_text(
-                        self._current_turn_assistant_parts
-                    )
-                    if assistant_content:
-                        message = TranscriptionMessage(
-                            role="assistant",
-                            content=assistant_content,
-                            timestamp=self._turn_start_time or time_now_iso8601(),
-                        )
-                        await self._emit_update([message])
-
-                await self._end_turn(was_interrupted=isinstance(frame, CancelFrame))
-
-            await self.push_frame(frame, direction)
-
-        else:
-            await self.push_frame(frame, direction)
--- a/src/pipecat/runner/run.py
+++ b/src/pipecat/runner/run.py
@@ -302,7 +302,7 @@ def _setup_webrtc_routes(
        result: StartBotResult = {"sessionId": session_id}
        if request_data.get("enableDefaultIceServers"):
            result["iceConfig"] = IceConfig(
-                iceServers=[IceServer(urls="stun:stun.l.google.com:19302")]
+                iceServers=[IceServer(urls=["stun:stun.l.google.com:19302"])]
            )

        return result
--- a/src/pipecat/serializers/plivo.py
+++ b/src/pipecat/serializers/plivo.py
@@ -199,7 +199,7 @@ class PlivoFrameSerializer(FrameSerializer):
                        )

        except Exception as e:
-            logger.exception(f"Failed to hang up Plivo call: {e}")
+            logger.error(f"Failed to hang up Plivo call: {e}")

    async def deserialize(self, data: str | bytes) -> Frame | None:
        """Deserializes Plivo WebSocket data to Pipecat frames.
--- a/src/pipecat/serializers/telnyx.py
+++ b/src/pipecat/serializers/telnyx.py
@@ -225,7 +225,7 @@ class TelnyxFrameSerializer(FrameSerializer):
                        )

        except Exception as e:
-            logger.exception(f"Failed to hang up Telnyx call: {e}")
+            logger.error(f"Failed to hang up Telnyx call: {e}")

    async def deserialize(self, data: str | bytes) -> Frame | None:
        """Deserializes Telnyx WebSocket data to Pipecat frames.
--- a/src/pipecat/serializers/twilio.py
+++ b/src/pipecat/serializers/twilio.py
@@ -236,7 +236,7 @@ class TwilioFrameSerializer(FrameSerializer):
                        )

        except Exception as e:
-            logger.exception(f"Failed to hang up Twilio call: {e}")
+            logger.error(f"Failed to hang up Twilio call: {e}")

    async def deserialize(self, data: str | bytes) -> Frame | None:
        """Deserializes Twilio WebSocket data to Pipecat frames.
--- a/src/pipecat/services/ai_service.py
+++ b/src/pipecat/services/ai_service.py
@@ -166,6 +166,6 @@ class AIService(FrameProcessor):
        async for f in generator:
            if f:
                if isinstance(f, ErrorFrame):
-                    await self.push_error(f)
+                    await self.push_error_frame(f)
                else:
                    await self.push_frame(f)
--- a/src/pipecat/services/anthropic/llm.py
+++ b/src/pipecat/services/anthropic/llm.py
@@ -17,7 +17,7 @@ import io
 import json
 import re
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Literal, Optional, Union

 import httpx
 from loguru import logger
@@ -40,6 +40,9 @@ from pipecat.frames.frames import (
    LLMFullResponseStartFrame,
    LLMMessagesFrame,
    LLMTextFrame,
+    LLMThoughtEndFrame,
+    LLMThoughtStartFrame,
+    LLMThoughtTextFrame,
    LLMUpdateSettingsFrame,
    UserImageRawFrame,
 )
@@ -110,6 +113,24 @@ class AnthropicLLMService(LLMService):
    # Overriding the default adapter to use the Anthropic one.
    adapter_class = AnthropicLLMAdapter

+    class ThinkingConfig(BaseModel):
+        """Configuration for extended thinking.
+
+        Parameters:
+            type: Type of thinking mode (currently only "enabled" or "disabled").
+            budget_tokens: Maximum number of tokens for thinking.
+                With today's models, the minimum is 1024.
+                Only allowed if type is "enabled".
+        """
+
+        # Why `| str` here? To not break compatibility in case Anthropic adds
+        # more types in the future.
+        type: Literal["enabled", "disabled"] | str
+
+        # Why not enforce minimnum of 1024 here? To not break compatibility in
+        # case Anthropic changes this requirement in the future.
+        budget_tokens: int
+
    class InputParams(BaseModel):
        """Input parameters for Anthropic model inference.

@@ -124,6 +145,10 @@ class AnthropicLLMService(LLMService):
            temperature: Sampling temperature between 0.0 and 1.0.
            top_k: Top-k sampling parameter.
            top_p: Top-p sampling parameter between 0.0 and 1.0.
+            thinking: Extended thinking configuration.
+                Enabling extended thinking causes the model to spend more time "thinking" before responding.
+                It also causes this service to emit LLMThinking*Frames during response generation.
+                Extended thinking is disabled by default.
            extra: Additional parameters to pass to the API.
        """

@@ -133,6 +158,9 @@ class AnthropicLLMService(LLMService):
        temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
        top_k: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0)
        top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
+        thinking: Optional["AnthropicLLMService.ThinkingConfig"] = Field(
+            default_factory=lambda: NOT_GIVEN
+        )
        extra: Optional[Dict[str, Any]] = Field(default_factory=dict)

        def model_post_init(self, __context):
@@ -191,6 +219,7 @@ class AnthropicLLMService(LLMService):
            "temperature": params.temperature,
            "top_k": params.top_k,
            "top_p": params.top_p,
+            "thinking": params.thinking,
            "extra": params.extra if isinstance(params.extra, dict) else {},
        }

@@ -354,12 +383,21 @@ class AnthropicLLMService(LLMService):
                "top_p": self._settings["top_p"],
            }

+            # Add thinking parameter if set
+            if self._settings["thinking"]:
+                params["thinking"] = self._settings["thinking"].model_dump(exclude_unset=True)
+
            # Messages, system, tools
            params.update(params_from_context)

            params.update(self._settings["extra"])

-            response = await self._create_message_stream(self._client.messages.create, params)
+            # "Interleaved thinking" needed to allow thinking between sequences
+            # of function calls, when extended thinking is enabled.
+            # Note that this requires us to use `client.beta`, below.
+            params.update({"betas": ["interleaved-thinking-2025-05-14"]})
+
+            response = await self._create_message_stream(self._client.beta.messages.create, params)

            await self.stop_ttfb_metrics()

@@ -380,10 +418,21 @@ class AnthropicLLMService(LLMService):
                        completion_tokens_estimate += self._estimate_tokens(
                            event.delta.partial_json
                        )
+                    elif hasattr(event.delta, "thinking"):
+                        await self.push_frame(LLMThoughtTextFrame(text=event.delta.thinking))
+                    elif hasattr(event.delta, "signature"):
+                        await self.push_frame(LLMThoughtEndFrame(signature=event.delta.signature))
                elif event.type == "content_block_start":
                    if event.content_block.type == "tool_use":
                        tool_use_block = event.content_block
                        json_accumulator = ""
+                    elif event.content_block.type == "thinking":
+                        await self.push_frame(
+                            LLMThoughtStartFrame(
+                                append_to_context=True,
+                                llm=self.get_llm_adapter().id_for_llm_specific_messages,
+                            )
+                        )
                elif (
                    event.type == "message_delta"
                    and hasattr(event.delta, "stop_reason")
@@ -458,8 +507,7 @@ class AnthropicLLMService(LLMService):
        except httpx.TimeoutException:
            await self._call_event_handler("on_completion_timeout")
        except Exception as e:
-            logger.exception(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(f"{e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            await self.stop_processing_metrics()
            await self.push_frame(LLMFullResponseEndFrame())
--- a/src/pipecat/services/assemblyai/stt.py
+++ b/src/pipecat/services/assemblyai/stt.py
@@ -206,9 +206,8 @@ class AssemblyAISTTService(STTService):

            await self._call_event_handler("on_connected")
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
            self._connected = False
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
            raise

    async def _disconnect(self):
@@ -233,8 +232,7 @@ class AssemblyAISTTService(STTService):
                    logger.warning("Timed out waiting for termination message from server")

            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+                await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)

            if self._receive_task:
                await self.cancel_task(self._receive_task)
@@ -242,8 +240,7 @@ class AssemblyAISTTService(STTService):
            await self._websocket.close()

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)

        finally:
            self._websocket = None
@@ -262,13 +259,11 @@ class AssemblyAISTTService(STTService):
                except websockets.exceptions.ConnectionClosedOK:
                    break
                except Exception as e:
-                    logger.error(f"{self} exception: {e}")
-                    await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+                    await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
                    break

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)

    def _parse_message(self, message: Dict[str, Any]) -> BaseMessage:
        """Parse a raw message into the appropriate message type."""
@@ -297,8 +292,7 @@ class AssemblyAISTTService(STTService):
            elif isinstance(parsed_message, TerminationMessage):
                await self._handle_termination(parsed_message)
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)

    async def _handle_termination(self, message: TerminationMessage):
        """Handle termination message."""
--- a/src/pipecat/services/asyncai/tts.py
+++ b/src/pipecat/services/asyncai/tts.py
@@ -56,6 +56,17 @@ def language_to_async_language(language: Language) -> Optional[str]:
        Language.ES: "es",
        Language.DE: "de",
        Language.IT: "it",
+        Language.PT: "pt",
+        Language.NL: "nl",
+        Language.AR: "ar",
+        Language.RU: "ru",
+        Language.RO: "ro",
+        Language.JA: "ja",
+        Language.HE: "he",
+        Language.HY: "hy",
+        Language.TR: "tr",
+        Language.HI: "hi",
+        Language.ZH: "zh",
    }

    return resolve_language(language, LANGUAGE_MAP, use_base_code=True)
@@ -74,7 +85,7 @@ class AsyncAITTSService(InterruptibleTTSService):
            language: Language to use for synthesis.
        """

-        language: Optional[Language] = Language.EN
+        language: Optional[Language] = None

    def __init__(
        self,
@@ -83,7 +94,7 @@ class AsyncAITTSService(InterruptibleTTSService):
        voice_id: str,
        version: str = "v1",
        url: str = "wss://api.async.ai/text_to_speech/websocket/ws",
-        model: str = "asyncflow_v2.0",
+        model: str = "asyncflow_multilingual_v1.0",
        sample_rate: Optional[int] = None,
        encoding: str = "pcm_s16le",
        container: str = "raw",
@@ -99,7 +110,7 @@ class AsyncAITTSService(InterruptibleTTSService):
                https://docs.async.ai/list-voices-16699698e0
            version: Async API version.
            url: WebSocket URL for Async TTS API.
-            model: TTS model to use (e.g., "asyncflow_v2.0").
+            model: TTS model to use (e.g., "asyncflow_multilingual_v1.0").
            sample_rate: Audio sample rate.
            encoding: Audio encoding format.
            container: Audio container format.
@@ -128,7 +139,7 @@ class AsyncAITTSService(InterruptibleTTSService):
            },
            "language": self.language_to_service_language(params.language)
            if params.language
-            else "en",
+            else None,
        }

        self.set_model_name(model)
@@ -228,8 +239,7 @@ class AsyncAITTSService(InterruptibleTTSService):

            await self._call_event_handler("on_connected")
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
            self._websocket = None
            await self._call_event_handler("on_connection_error", f"{e}")

@@ -241,8 +251,7 @@ class AsyncAITTSService(InterruptibleTTSService):
                logger.debug("Disconnecting from Async")
                await self._websocket.close()
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            self._websocket = None
            self._started = False
@@ -287,12 +296,11 @@ class AsyncAITTSService(InterruptibleTTSService):
                )
                await self.push_frame(frame)
            elif msg.get("error_code"):
-                logger.error(f"{self} error: {msg}")
                await self.push_frame(TTSStoppedFrame())
                await self.stop_all_metrics()
-                await self.push_error(ErrorFrame(error=f"{self} error: {msg['message']}"))
+                await self.push_error(error_msg=f"Error: {msg['message']}")
            else:
-                logger.error(f"{self} error, unknown message type: {msg}")
+                await self.push_error(error_msg=f"Unknown message type: {msg}")

    async def _keepalive_task_handler(self):
        """Send periodic keepalive messages to maintain WebSocket connection."""
@@ -335,16 +343,14 @@ class AsyncAITTSService(InterruptibleTTSService):
                await self._get_websocket().send(msg)
                await self.start_tts_usage_metrics(text)
            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                yield ErrorFrame(error=f"{self} error: {e}")
+                yield ErrorFrame(error=f"Unknown error occurred: {e}")
                yield TTSStoppedFrame()
                await self._disconnect()
                await self._connect()
                return
            yield None
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")


 class AsyncAIHttpTTSService(TTSService):
@@ -362,7 +368,7 @@ class AsyncAIHttpTTSService(TTSService):
            language: Language to use for synthesis.
        """

-        language: Optional[Language] = Language.EN
+        language: Optional[Language] = None

    def __init__(
        self,
@@ -370,7 +376,7 @@ class AsyncAIHttpTTSService(TTSService):
        api_key: str,
        voice_id: str,
        aiohttp_session: aiohttp.ClientSession,
-        model: str = "asyncflow_v2.0",
+        model: str = "asyncflow_multilingual_v1.0",
        url: str = "https://api.async.ai",
        version: str = "v1",
        sample_rate: Optional[int] = None,
@@ -385,7 +391,7 @@ class AsyncAIHttpTTSService(TTSService):
            api_key: Async API key.
            voice_id: ID of the voice to use for synthesis.
            aiohttp_session: An aiohttp session for making HTTP requests.
-            model: TTS model to use (e.g., "asyncflow_v2.0").
+            model: TTS model to use (e.g., "asyncflow_multilingual_v1.0").
            url: Base URL for Async API.
            version: API version string for Async API.
            sample_rate: Audio sample rate.
@@ -409,7 +415,7 @@ class AsyncAIHttpTTSService(TTSService):
            },
            "language": self.language_to_service_language(params.language)
            if params.language
-            else "en",
+            else None,
        }
        self.set_voice(voice_id)
        self.set_model_name(model)
@@ -477,8 +483,7 @@ class AsyncAIHttpTTSService(TTSService):
            async with self._session.post(url, json=payload, headers=headers) as response:
                if response.status != 200:
                    error_text = await response.text()
-                    logger.error(f"Async API error: {error_text}")
-                    await self.push_error(ErrorFrame(error=f"Async API error: {error_text}"))
+                    await self.push_error(error_msg=f"Async API error: {error_text}")
                    raise Exception(f"Async API returned status {response.status}: {error_text}")

                audio_data = await response.read()
@@ -494,8 +499,7 @@ class AsyncAIHttpTTSService(TTSService):
            yield frame

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            await self.stop_ttfb_metrics()
            yield TTSStoppedFrame()
--- a/src/pipecat/services/aws/llm.py
+++ b/src/pipecat/services/aws/llm.py
@@ -734,7 +734,7 @@ class AWSBedrockLLMService(LLMService):
        aws_access_key: Optional[str] = None,
        aws_secret_key: Optional[str] = None,
        aws_session_token: Optional[str] = None,
-        aws_region: str = "us-east-1",
+        aws_region: Optional[str] = None,
        params: Optional[InputParams] = None,
        client_config: Optional[Config] = None,
        retry_timeout_secs: Optional[float] = 5.0,
@@ -1136,7 +1136,7 @@ class AWSBedrockLLMService(LLMService):
        except (ReadTimeoutError, asyncio.TimeoutError):
            await self._call_event_handler("on_completion_timeout")
        except Exception as e:
-            logger.exception(f"{self} exception: {e}")
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            await self.stop_processing_metrics()
            await self.push_frame(LLMFullResponseEndFrame())
--- a/src/pipecat/services/aws/nova_sonic/llm.py
+++ b/src/pipecat/services/aws/nova_sonic/llm.py
@@ -157,6 +157,12 @@ class Params(BaseModel):
        max_tokens: Maximum number of tokens to generate.
        top_p: Nucleus sampling parameter.
        temperature: Sampling temperature for text generation.
+        endpointing_sensitivity: Controls how quickly Nova Sonic decides the
+            user has stopped speaking. Can be "LOW", "MEDIUM", or "HIGH", with
+            "HIGH" being the most sensitive (i.e., causing the model to respond
+            most quickly).
+            If not set, uses the model's default behavior.
+            Only supported with Nova 2 Sonic (the default model).
    """

    # Audio input
@@ -174,6 +180,9 @@ class Params(BaseModel):
    top_p: Optional[float] = Field(default=0.9)
    temperature: Optional[float] = Field(default=0.7)

+    # Turn-taking
+    endpointing_sensitivity: Optional[str] = Field(default=None)
+

 class AWSNovaSonicLLMService(LLMService):
    """AWS Nova Sonic speech-to-speech LLM service.
@@ -192,8 +201,8 @@ class AWSNovaSonicLLMService(LLMService):
        access_key_id: str,
        session_token: Optional[str] = None,
        region: str,
-        model: str = "amazon.nova-sonic-v1:0",
-        voice_id: str = "matthew",  # matthew, tiffany, amy
+        model: str = "amazon.nova-2-sonic-v1:0",
+        voice_id: str = "matthew",
        params: Optional[Params] = None,
        system_instruction: Optional[str] = None,
        tools: Optional[ToolsSchema] = None,
@@ -207,8 +216,15 @@ class AWSNovaSonicLLMService(LLMService):
            access_key_id: AWS access key ID for authentication.
            session_token: AWS session token for authentication.
            region: AWS region where the service is hosted.
-            model: Model identifier. Defaults to "amazon.nova-sonic-v1:0".
-            voice_id: Voice ID for speech synthesis. Options: matthew, tiffany, amy.
+                Supported regions:
+                - Nova 2 Sonic (the default model): "us-east-1", "us-west-2", "ap-northeast-1"
+                - Nova Sonic (the older model): "us-east-1", "ap-northeast-1"
+            model: Model identifier. Defaults to "amazon.nova-2-sonic-v1:0".
+            voice_id: Voice ID for speech synthesis.
+                Note that some voices are designed for use with a specific language.
+                Options:
+                - Nova 2 Sonic (the default model): see https://docs.aws.amazon.com/nova/latest/nova2-userguide/sonic-language-support.html
+                - Nova Sonic (the older model): see https://docs.aws.amazon.com/nova/latest/userguide/available-voices.html.
            params: Model parameters for audio configuration and inference.
            system_instruction: System-level instruction for the model.
            tools: Available tools/functions for the model to use.
@@ -232,6 +248,17 @@ class AWSNovaSonicLLMService(LLMService):
        self._system_instruction = system_instruction
        self._tools = tools

+        # Validate endpointing_sensitivity parameter
+        if (
+            self._params.endpointing_sensitivity
+            and not self._is_endpointing_sensitivity_supported()
+        ):
+            logger.warning(
+                f"endpointing_sensitivity is not supported for model '{model}' and will be ignored. "
+                "This parameter is only supported starting with Nova 2 Sonic (amazon.nova-2-sonic-v1:0)."
+            )
+            self._params.endpointing_sensitivity = None
+
        if not send_transcription_frames:
            import warnings

@@ -453,13 +480,13 @@ class AWSNovaSonicLLMService(LLMService):
            self._ready_to_send_context = True
            await self._finish_connecting_if_context_available()
        except Exception as e:
-            logger.error(f"{self} initialization error: {e}")
+            await self.push_error(error_msg=f"Initialization error: {e}", exception=e)
            await self._disconnect()

    async def _process_completed_function_calls(self, send_new_results: bool):
        # Check for set of completed function calls in the context
        for message in self._context.get_messages():
-            if message.get("role") and message.get("content") != "IN_PROGRESS":
+            if message.get("role") and message.get("content") not in ["IN_PROGRESS", "CANCELLED"]:
                tool_call_id = message.get("tool_call_id")
                if tool_call_id and tool_call_id not in self._completed_tool_calls:
                    # Found a newly-completed function call - send the result to the service
@@ -577,7 +604,7 @@ class AWSNovaSonicLLMService(LLMService):

            logger.info("Finished disconnecting")
        except Exception as e:
-            logger.error(f"{self} error disconnecting: {e}")
+            await self.push_error(error_msg=f"Error disconnecting: {e}", exception=e)

    def _create_client(self) -> BedrockRuntimeClient:
        config = Config(
@@ -591,11 +618,33 @@ class AWSNovaSonicLLMService(LLMService):
        )
        return BedrockRuntimeClient(config=config)

+    def _is_first_generation_sonic_model(self) -> bool:
+        # Nova Sonic (the older model) is identified by "amazon.nova-sonic-v1:0"
+        return self._model == "amazon.nova-sonic-v1:0"
+
+    def _is_endpointing_sensitivity_supported(self) -> bool:
+        # endpointing_sensitivity is only supported with Nova 2 Sonic (and,
+        # presumably, future models)
+        return not self._is_first_generation_sonic_model()
+
+    def _is_assistant_response_trigger_needed(self) -> bool:
+        # Assistant response trigger audio is only needed with the older model
+        return self._is_first_generation_sonic_model()
+
    #
    # LLM communication: input events (pipecat -> LLM)
    #

    async def _send_session_start_event(self):
+        turn_detection_config = (
+            f""",
+              "turnDetectionConfiguration": {{
+                "endpointingSensitivity": "{self._params.endpointing_sensitivity}"
+              }}"""
+            if self._params.endpointing_sensitivity
+            else ""
+        )
+
        session_start = f"""
        {{
          "event": {{
@@ -604,7 +653,7 @@ class AWSNovaSonicLLMService(LLMService):
                "maxTokens": {self._params.max_tokens},
                "topP": {self._params.top_p},
                "temperature": {self._params.temperature}
-              }}
+              }}{turn_detection_config}
            }}
          }}
        }}
@@ -885,7 +934,7 @@ class AWSNovaSonicLLMService(LLMService):
                # Errors are kind of expected while disconnecting, so just
                # ignore them and do nothing
                return
-            logger.error(f"{self} error processing responses: {e}")
+            await self.push_error(error_msg=f"Error processing responses: {e}", exception=e)
            if self._wants_connection:
                await self.reset_conversation()

@@ -1189,7 +1238,8 @@ class AWSNovaSonicLLMService(LLMService):
        )

    #
-    # assistant response trigger (HACK)
+    # assistant response trigger
+    # HACK: only needed for the older Nova Sonic (as opposed to Nova 2 Sonic) model
    #

    # Class variable
@@ -1203,12 +1253,17 @@ class AWSNovaSonicLLMService(LLMService):

        Sends a pre-recorded "ready" audio trigger to prompt the assistant
        to start speaking. This is useful for controlling conversation flow.
-
-        Returns:
-            False if already triggering a response, True otherwise.
        """
+        if not self._is_assistant_response_trigger_needed():
+            logger.warning(
+                f"Assistant response trigger not needed for model '{self._model}'; skipping. "
+                "An LLMRunFrame() should be sufficient to prompt the assistant to respond, "
+                "assuming the context ends in a user message."
+            )
+            return
+
        if self._triggering_assistant_response:
-            return False
+            return

        self._triggering_assistant_response = True

--- a/src/pipecat/services/aws/stt.py
+++ b/src/pipecat/services/aws/stt.py
@@ -58,7 +58,7 @@ class AWSTranscribeSTTService(STTService):
        api_key: Optional[str] = None,
        aws_access_key_id: Optional[str] = None,
        aws_session_token: Optional[str] = None,
-        region: Optional[str] = "us-east-1",
+        region: Optional[str] = None,
        sample_rate: int = 16000,
        language: Language = Language.EN,
        **kwargs,
@@ -69,7 +69,7 @@ class AWSTranscribeSTTService(STTService):
            api_key: AWS secret access key. If None, uses AWS_SECRET_ACCESS_KEY environment variable.
            aws_access_key_id: AWS access key ID. If None, uses AWS_ACCESS_KEY_ID environment variable.
            aws_session_token: AWS session token for temporary credentials. If None, uses AWS_SESSION_TOKEN environment variable.
-            region: AWS region for the service. Defaults to "us-east-1".
+            region: AWS region for the service.
            sample_rate: Audio sample rate in Hz. Must be 8000 or 16000. Defaults to 16000.
            language: Language for transcription. Defaults to English.
            **kwargs: Additional arguments passed to parent STTService class.
@@ -140,8 +140,7 @@ class AWSTranscribeSTTService(STTService):
                    return
                logger.warning("WebSocket connection not established after connect")
            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+                await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
                retry_count += 1
                if retry_count < max_retries:
                    await asyncio.sleep(1)  # Wait before retrying
@@ -182,8 +181,7 @@ class AWSTranscribeSTTService(STTService):
                try:
                    await self._connect()
                except Exception as e:
-                    logger.error(f"{self} exception: {e}")
-                    yield ErrorFrame(error=f"{self} error: {e}")
+                    yield ErrorFrame(error=f"Unknown error occurred: {e}")
                    return

            # Format the audio data according to AWS event stream format
@@ -200,13 +198,11 @@ class AWSTranscribeSTTService(STTService):
                await self._disconnect()
                # Don't yield error here - we'll retry on next frame
            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                yield ErrorFrame(error=f"{self} error: {e}")
+                yield ErrorFrame(error=f"Unknown error occurred: {e}")
                await self._disconnect()

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")
            await self._disconnect()

    async def _connect(self):
@@ -289,8 +285,7 @@ class AWSTranscribeSTTService(STTService):

                await self._call_event_handler("on_connected")
            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+                await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
                await self._disconnect()
                raise

@@ -310,8 +305,7 @@ class AWSTranscribeSTTService(STTService):
                await self._ws_client.send(json.dumps(end_stream))
            await self._ws_client.close()
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            self._ws_client = None
            await self._call_event_handler("on_disconnected")
@@ -529,15 +523,15 @@ class AWSTranscribeSTTService(STTService):
                                    )
                elif headers.get(":message-type") == "exception":
                    error_msg = payload.get("Message", "Unknown error")
-                    logger.error(f"{self} Exception from AWS: {error_msg}")
-                    await self.push_frame(ErrorFrame(f"AWS Transcribe error: {error_msg}"))
+                    await self.push_error(error_msg=f"AWS Transcribe error: {error_msg}")
                else:
                    logger.debug(f"{self} Other message type received: {headers}")
                    logger.debug(f"{self} Payload: {payload}")
            except websockets.exceptions.ConnectionClosed as e:
-                logger.error(f"{self} WebSocket connection closed in receive loop: {e}")
+                await self.push_error(
+                    error_msg=f"WebSocket connection closed in receive loop", exception=e
+                )
                break
            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+                await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
                break
--- a/src/pipecat/services/aws/tts.py
+++ b/src/pipecat/services/aws/tts.py
@@ -312,7 +312,6 @@ class AWSPollyTTSService(TTSService):

                yield TTSStoppedFrame()
        except (BotoCoreError, ClientError) as error:
-            logger.exception(f"{self} error generating TTS: {error}")
            error_message = f"AWS Polly TTS error: {str(error)}"
            yield ErrorFrame(error=error_message)

--- a/src/pipecat/services/azure/image.py
+++ b/src/pipecat/services/azure/image.py
@@ -91,7 +91,6 @@ class AzureImageGenServiceREST(ImageGenService):
            while status != "succeeded":
                attempts_left -= 1
                if attempts_left == 0:
-                    logger.error(f"{self} error: image generation timed out")
                    yield ErrorFrame("Image generation timed out")
                    return

@@ -104,7 +103,6 @@ class AzureImageGenServiceREST(ImageGenService):

            image_url = json_response["result"]["data"][0]["url"] if json_response else None
            if not image_url:
-                logger.error(f"{self} error: image generation failed")
                yield ErrorFrame("Image generation failed")
                return

--- a/src/pipecat/services/azure/realtime/llm.py
+++ b/src/pipecat/services/azure/realtime/llm.py
@@ -61,5 +61,5 @@ class AzureRealtimeLLMService(OpenAIRealtimeLLMService):
            )
            self._receive_task = self.create_task(self._receive_task_handler())
        except Exception as e:
-            logger.error(f"{self} initialization error: {e}")
+            await self.push_error(error_msg=f"initialization error: {e}", exception=e)
            self._websocket = None
--- a/src/pipecat/services/azure/stt.py
+++ b/src/pipecat/services/azure/stt.py
@@ -121,8 +121,7 @@ class AzureSTTService(STTService):
                self._audio_stream.write(audio)
            yield None
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")

    async def start(self, frame: StartFrame):
        """Start the speech recognition service.
@@ -151,8 +150,9 @@ class AzureSTTService(STTService):
            self._speech_recognizer.recognized.connect(self._on_handle_recognized)
            self._speech_recognizer.start_continuous_recognition_async()
        except Exception as e:
-            logger.error(f"{self} exception during initialization: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(
+                error_msg=f"Uncaught exception during initialization: {e}", exception=e
+            )

    async def stop(self, frame: EndFrame):
        """Stop the speech recognition service.
--- a/src/pipecat/services/azure/tts.py
+++ b/src/pipecat/services/azure/tts.py
@@ -327,7 +327,6 @@ class AzureTTSService(AzureBaseTTSService):
        try:
            if self._speech_synthesizer is None:
                error_msg = "Speech synthesizer not initialized."
-                logger.error(error_msg)
                yield ErrorFrame(error=error_msg)
                return

@@ -355,15 +354,13 @@ class AzureTTSService(AzureBaseTTSService):
                yield TTSStoppedFrame()

            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                yield ErrorFrame(error=f"{self} error: {e}")
+                yield ErrorFrame(error=f"Unknown error occurred: {e}")
                yield TTSStoppedFrame()
                # Could add reconnection logic here if needed
                return

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")


 class AzureHttpTTSService(AzureBaseTTSService):
@@ -440,5 +437,6 @@ class AzureHttpTTSService(AzureBaseTTSService):
            cancellation_details = result.cancellation_details
            logger.warning(f"Speech synthesis canceled: {cancellation_details.reason}")
            if cancellation_details.reason == CancellationReason.Error:
-                logger.error(f"{self} error: {cancellation_details.error_details}")
-                yield ErrorFrame(error=f"{self} error: {cancellation_details.error_details}")
+                yield ErrorFrame(
+                    error=f"Unknown error occurred: {cancellation_details.error_details}"
+                )
--- a/src/pipecat/services/cartesia/stt.py
+++ b/src/pipecat/services/cartesia/stt.py
@@ -10,7 +10,6 @@ This module provides a WebSocket-based STT service that integrates with
 the Cartesia Live transcription API for real-time speech recognition.
 """

-import asyncio
 import json
 import urllib.parse
 from typing import AsyncGenerator, Optional
@@ -20,7 +19,6 @@ from loguru import logger
 from pipecat.frames.frames import (
    CancelFrame,
    EndFrame,
-    ErrorFrame,
    Frame,
    InterimTranscriptionFrame,
    StartFrame,
@@ -160,20 +158,16 @@ class CartesiaSTTService(WebsocketSTTService):
            sample_rate=sample_rate,
        )

-        merged_options = default_options
+        merged_options = default_options.to_dict()
        if live_options:
-            merged_options_dict = default_options.to_dict()
-            merged_options_dict.update(live_options.to_dict())
-            merged_options = CartesiaLiveOptions(
-                **{
-                    k: v
-                    for k, v in merged_options_dict.items()
-                    if not isinstance(v, str) or v != "None"
-                }
-            )
+            merged_options.update(live_options.to_dict())
+            # Filter out "None" string values
+            merged_options = {
+                k: v for k, v in merged_options.items() if not isinstance(v, str) or v != "None"
+            }

        self._settings = merged_options
-        self.set_model_name(merged_options.model)
+        self.set_model_name(merged_options["model"])
        self._api_key = api_key
        self._base_url = base_url or "api.cartesia.ai"
        self._receive_task = None
@@ -254,7 +248,7 @@ class CartesiaSTTService(WebsocketSTTService):
        await self._connect_websocket()

        if self._websocket and not self._receive_task:
-            self._receive_task = asyncio.create_task(self._receive_task_handler(self._report_error))
+            self._receive_task = self.create_task(self._receive_task_handler(self._report_error))

    async def _disconnect(self):
        if self._receive_task:
@@ -269,15 +263,14 @@ class CartesiaSTTService(WebsocketSTTService):
                return
            logger.debug("Connecting to Cartesia STT")

-            params = self._settings.to_dict()
+            params = self._settings
            ws_url = f"wss://{self._base_url}/stt/websocket?{urllib.parse.urlencode(params)}"
            headers = {"Cartesia-Version": "2025-04-16", "X-API-Key": self._api_key}

            self._websocket = await websocket_connect(ws_url, additional_headers=headers)
            await self._call_event_handler("on_connected")
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)

    async def _disconnect_websocket(self):
        try:
@@ -285,8 +278,7 @@ class CartesiaSTTService(WebsocketSTTService):
                logger.debug("Disconnecting from Cartesia STT")
                await self._websocket.close()
        except Exception as e:
-            logger.error(f"{self} error closing websocket: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Error closing websocket: {e}", exception=e)
        finally:
            self._websocket = None
            await self._call_event_handler("on_disconnected")
@@ -297,12 +289,15 @@ class CartesiaSTTService(WebsocketSTTService):
        raise Exception("Websocket not connected")

    async def _process_messages(self):
+        """Process incoming WebSocket messages."""
        async for message in self._get_websocket():
            try:
                data = json.loads(message)
                await self._process_response(data)
            except json.JSONDecodeError:
                logger.warning(f"Received non-JSON message: {message}")
+            except Exception as e:
+                logger.error(f"Error processing message: {e}")

    async def _receive_messages(self):
        while True:
@@ -319,8 +314,7 @@ class CartesiaSTTService(WebsocketSTTService):

            elif data["type"] == "error":
                error_msg = data.get("message", "Unknown error")
-                logger.error(f"Cartesia error: {error_msg}")
-                await self.push_error(ErrorFrame(error=error_msg))
+                await self.push_error(error_msg=error_msg)

    @traced_stt
    async def _handle_transcription(
@@ -352,6 +346,7 @@ class CartesiaSTTService(WebsocketSTTService):
                        self._user_id,
                        time_now_iso8601(),
                        language,
+                        result=data,
                    )
                )
                await self._handle_transcription(transcript, is_final, language)
@@ -364,5 +359,6 @@ class CartesiaSTTService(WebsocketSTTService):
                        self._user_id,
                        time_now_iso8601(),
                        language,
+                        result=data,
                    )
                )
--- a/src/pipecat/services/cartesia/tts.py
+++ b/src/pipecat/services/cartesia/tts.py
@@ -497,8 +497,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
            )
            await self._call_event_handler("on_connected")
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
            self._websocket = None
            await self._call_event_handler("on_connection_error", f"{e}")

@@ -510,8 +509,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
                logger.debug("Disconnecting from Cartesia")
                await self._websocket.close()
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            self._context_id = None
            self._websocket = None
@@ -564,13 +562,12 @@ class CartesiaTTSService(AudioContextWordTTSService):
                )
                await self.append_to_audio_context(msg["context_id"], frame)
            elif msg["type"] == "error":
-                logger.error(f"{self} error: {msg}")
                await self.push_frame(TTSStoppedFrame())
                await self.stop_all_metrics()
-                await self.push_error(ErrorFrame(error=f"{self} error: {msg['error']}"))
+                await self.push_error(error_msg=f"Error: {msg}")
                self._context_id = None
            else:
-                logger.error(f"{self} error, unknown message type: {msg}")
+                await self.push_error(error_msg=f"Error, unknown message type: {msg}")

    async def _receive_messages(self):
        while True:
@@ -608,16 +605,14 @@ class CartesiaTTSService(AudioContextWordTTSService):
                await self._get_websocket().send(msg)
                await self.start_tts_usage_metrics(text)
            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                yield ErrorFrame(error=f"{self} error: {e}")
+                yield ErrorFrame(error=f"Unknown error occurred: {e}")
                yield TTSStoppedFrame()
                await self._disconnect()
                await self._connect()
                return
            yield None
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")


 class CartesiaHttpTTSService(TTSService):
@@ -808,8 +803,7 @@ class CartesiaHttpTTSService(TTSService):
            async with session.post(url, json=payload, headers=headers) as response:
                if response.status != 200:
                    error_text = await response.text()
-                    logger.error(f"Cartesia API error: {error_text}")
-                    await self.push_error(ErrorFrame(error=f"Cartesia API error: {error_text}"))
+                    yield ErrorFrame(error=f"Cartesia API error: {error_text}")
                    raise Exception(f"Cartesia API returned status {response.status}: {error_text}")

                audio_data = await response.read()
@@ -825,8 +819,7 @@ class CartesiaHttpTTSService(TTSService):
            yield frame

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")
        finally:
            await self.stop_ttfb_metrics()
            yield TTSStoppedFrame()
--- a/src/pipecat/services/deepgram/flux/stt.py
+++ b/src/pipecat/services/deepgram/flux/stt.py
@@ -150,7 +150,17 @@ class DeepgramFluxSTTService(WebsocketSTTService):
                    params=params
                )
        """
-        super().__init__(sample_rate=sample_rate, **kwargs)
+        # Note: For DeepgramFluxSTTService, differently from other processes, we need to create
+        # the _receive_task inside _connect_websocket, because the websocket should only be
+        # considered connected and ready to send audio once we receive from Flux the message
+        # which confirms the connection has been established.
+        # If we try to keep the logic reconnect_on_error, when receiving a message, the
+        # _receive_task_handler would try to reconnect in case of error, invoking the
+        # _connect_websocket again and leading to a case where the first _receive_task_handler
+        # was never destroyed.
+        # So we can keep it here as false, because inside the method send_with_retry, it will
+        # already try to reconnect if needed.
+        super().__init__(sample_rate=sample_rate, reconnect_on_error=False, **kwargs)

        self._api_key = api_key
        self._url = url
@@ -183,14 +193,6 @@ class DeepgramFluxSTTService(WebsocketSTTService):
        """
        await self._connect_websocket()

-        # Creating the receiver task (only created once during initial connection)
-        if not self._receive_task:
-            self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
-
-        # Creating the watchdog task (only created once during initial connection)
-        if not self._watchdog_task:
-            self._watchdog_task = self.create_task(self._watchdog_task_handler())
-
    async def _disconnect(self):
        """Disconnect from WebSocket and clean up tasks.

@@ -200,8 +202,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):
        try:
            await self._disconnect_websocket()
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            # Reset state only after everything is cleaned up
            self._websocket = None
@@ -243,14 +244,28 @@ class DeepgramFluxSTTService(WebsocketSTTService):
                additional_headers={"Authorization": f"Token {self._api_key}"},
            )

+            headers = {
+                k: v for k, v in self._websocket.response.headers.items() if k.startswith("dg-")
+            }
+            logger.debug(f'{self}: Websocket connection initialized: {{"headers": {headers}}}')
+
+            # Creating the receiver task
+            if not self._receive_task:
+                self._receive_task = self.create_task(
+                    self._receive_task_handler(self._report_error)
+                )
+
+            # Creating the watchdog task
+            if not self._watchdog_task:
+                self._watchdog_task = self.create_task(self._watchdog_task_handler())
+
            # Now wait for the connection established event
            logger.debug("WebSocket connected, waiting for server confirmation...")
            await self._connection_established_event.wait()
            logger.debug("Connected to Deepgram Flux Websocket")
            await self._call_event_handler("on_connected")
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
            self._websocket = None
            await self._call_event_handler("on_connection_error", f"{e}")

@@ -278,8 +293,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):
                logger.debug("Disconnecting from Deepgram Flux Websocket")
                await self._websocket.close()
        except Exception as e:
-            logger.error(f"{self} error closing websocket: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Error closing websocket: {e}", exception=e)
        finally:
            self._websocket = None
            await self._call_event_handler("on_disconnected")
@@ -289,10 +303,13 @@ class DeepgramFluxSTTService(WebsocketSTTService):

        This signals to the server that no more audio data will be sent.
        """
-        if self._websocket:
-            logger.debug("Sending CloseStream message to Deepgram Flux")
-            message = {"type": "CloseStream"}
-            await self._websocket.send(json.dumps(message))
+        try:
+            if self._websocket:
+                logger.debug("Sending CloseStream message to Deepgram Flux")
+                message = {"type": "CloseStream"}
+                await self._websocket.send(json.dumps(message))
+        except Exception as e:
+            await self.push_error(error_msg=f"Error sending closeStream: {e}", exception=e)

    def can_generate_metrics(self) -> bool:
        """Check if this service can generate processing metrics.
@@ -379,16 +396,13 @@ class DeepgramFluxSTTService(WebsocketSTTService):
                are issues sending the audio data.
        """
        if not self._websocket:
-            logger.error("Not connected to Deepgram Flux.")
-            yield ErrorFrame("Not connected to Deepgram Flux.")
            return

        try:
            self._last_stt_time = time.monotonic()
            await self.send_with_retry(audio, self._report_error)
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")
            return

        yield None
@@ -465,8 +479,7 @@ class DeepgramFluxSTTService(WebsocketSTTService):
                    # Skip malformed messages
                    continue
                except Exception as e:
-                    logger.error(f"{self} exception: {e}")
-                    await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+                    await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
                    # Error will be handled inside WebsocketService->_receive_task_handler
                    raise
            else:
--- a/src/pipecat/services/deepgram/stt.py
+++ b/src/pipecat/services/deepgram/stt.py
@@ -233,7 +233,14 @@ class DeepgramSTTService(STTService):
            )

        if not await self._connection.start(options=self._settings, addons=self._addons):
-            logger.error(f"{self}: unable to connect to Deepgram")
+            await self.push_error(error_msg=f"Unable to connect to Deepgram")
+        else:
+            headers = {
+                k: v
+                for k, v in self._connection._socket.response.headers.items()
+                if k.startswith("dg-")
+            }
+            logger.debug(f'{self}: Websocket connection initialized: {{"headers": {headers}}}')

    async def _disconnect(self):
        if await self._connection.is_connected():
@@ -256,7 +263,7 @@ class DeepgramSTTService(STTService):
    async def _on_error(self, *args, **kwargs):
        error: ErrorResponse = kwargs["error"]
        logger.warning(f"{self} connection error, will retry: {error}")
-        await self.push_error(ErrorFrame(error=f"{error}"))
+        await self.push_error(error_msg=f"{error}")
        await self.stop_all_metrics()
        # NOTE(aleix): we don't disconnect (i.e. call finish on the connection)
        # because this triggers more errors internally in the Deepgram SDK. So,
--- a/src/pipecat/services/deepgram/stt_sagemaker.py
+++ b/src/pipecat/services/deepgram/stt_sagemaker.py
@@ -210,8 +210,7 @@ class DeepgramSageMakerSTTService(STTService):
            try:
                await self._client.send_audio_chunk(audio)
            except Exception as e:
-                logger.error(f"Error sending audio to SageMaker: {e}")
-                await self.push_error(ErrorFrame(error=f"SageMaker STT error: {e}"))
+                yield ErrorFrame(error=f"Unknown error occurred: {e}")
        yield None

    async def _connect(self):
@@ -260,8 +259,7 @@ class DeepgramSageMakerSTTService(STTService):
            await self._call_event_handler("on_connected")

        except Exception as e:
-            logger.error(f"Failed to connect to SageMaker: {e}")
-            await self.push_error(ErrorFrame(error=f"SageMaker connection error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
            await self._call_event_handler("on_connection_error", str(e))

    async def _disconnect(self):
@@ -342,8 +340,7 @@ class DeepgramSageMakerSTTService(STTService):
        except asyncio.CancelledError:
            logger.debug("Response processor cancelled")
        except Exception as e:
-            logger.error(f"Error processing responses: {e}", exc_info=True)
-            await self.push_error(ErrorFrame(error=f"SageMaker response error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            logger.debug("Response processor stopped")

--- a/src/pipecat/services/deepgram/tts.py
+++ b/src/pipecat/services/deepgram/tts.py
@@ -10,35 +10,45 @@ This module provides integration with Deepgram's text-to-speech API
 for generating speech from text using various voice models.
 """

+import json
 from typing import AsyncGenerator, Optional

 import aiohttp
 from loguru import logger

 from pipecat.frames.frames import (
+    CancelFrame,
+    EndFrame,
    ErrorFrame,
    Frame,
+    InterruptionFrame,
+    LLMFullResponseEndFrame,
+    StartFrame,
    TTSAudioRawFrame,
    TTSStartedFrame,
    TTSStoppedFrame,
 )
-from pipecat.services.tts_service import TTSService
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.tts_service import TTSService, WebsocketTTSService
 from pipecat.utils.tracing.service_decorators import traced_tts

 try:
-    from deepgram import DeepgramClient, DeepgramClientOptions, SpeakOptions
+    from websockets.asyncio.client import connect as websocket_connect
+    from websockets.protocol import State
 except ModuleNotFoundError as e:
    logger.error(f"Exception: {e}")
-    logger.error("In order to use Deepgram, you need to `pip install pipecat-ai[deepgram]`.")
+    logger.error(
+        "In order to use DeepgramWebsocketTTSService, you need to `pip install pipecat-ai[deepgram]`."
+    )
    raise Exception(f"Missing module: {e}")


-class DeepgramTTSService(TTSService):
-    """Deepgram text-to-speech service.
+class DeepgramTTSService(WebsocketTTSService):
+    """Deepgram WebSocket-based text-to-speech service.

-    Provides text-to-speech synthesis using Deepgram's streaming API.
-    Supports various voice models and audio encoding formats with
-    configurable sample rates and quality settings.
+    Provides real-time text-to-speech synthesis using Deepgram's WebSocket API.
+    Supports streaming audio generation with interruption handling via the Clear
+    message for conversational AI use cases.
    """

    def __init__(
@@ -46,42 +56,220 @@ class DeepgramTTSService(TTSService):
        *,
        api_key: str,
        voice: str = "aura-2-helena-en",
-        base_url: str = "",
+        base_url: str = "wss://api.deepgram.com",
        sample_rate: Optional[int] = None,
        encoding: str = "linear16",
        **kwargs,
    ):
-        """Initialize the Deepgram TTS service.
+        """Initialize the Deepgram WebSocket TTS service.

        Args:
            api_key: Deepgram API key for authentication.
            voice: Voice model to use for synthesis. Defaults to "aura-2-helena-en".
-            base_url: Custom base URL for Deepgram API. Uses default if empty.
+            base_url: WebSocket base URL for Deepgram API. Defaults to "wss://api.deepgram.com".
            sample_rate: Audio sample rate in Hz. If None, uses service default.
            encoding: Audio encoding format. Defaults to "linear16".
-            **kwargs: Additional arguments passed to parent TTSService class.
+            **kwargs: Additional arguments passed to parent InterruptibleTTSService class.
        """
-        super().__init__(sample_rate=sample_rate, **kwargs)
+        super().__init__(
+            sample_rate=sample_rate,
+            pause_frame_processing=True,
+            push_stop_frames=True,
+            **kwargs,
+        )

+        self._api_key = api_key
+        self._base_url = base_url
        self._settings = {
            "encoding": encoding,
        }
        self.set_voice(voice)

-        client_options = DeepgramClientOptions(url=base_url)
-        self._deepgram_client = DeepgramClient(api_key, config=client_options)
+        self._receive_task = None

    def can_generate_metrics(self) -> bool:
        """Check if the service can generate metrics.

        Returns:
-            True, as Deepgram TTS service supports metrics generation.
+            True, as Deepgram WebSocket TTS service supports metrics generation.
        """
        return True

+    async def start(self, frame: StartFrame):
+        """Start the Deepgram WebSocket TTS service.
+
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
+        await super().start(frame)
+        await self._connect()
+
+    async def stop(self, frame: EndFrame):
+        """Stop the Deepgram WebSocket TTS service.
+
+        Args:
+            frame: The end frame.
+        """
+        await super().stop(frame)
+        await self._disconnect()
+
+    async def cancel(self, frame: CancelFrame):
+        """Cancel the Deepgram WebSocket TTS service.
+
+        Args:
+            frame: The cancel frame.
+        """
+        await super().cancel(frame)
+        await self._disconnect()
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames with special handling for LLM response end.
+
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame processing.
+        """
+        await super().process_frame(frame, direction)
+
+        # When the LLM finishes responding, flush any remaining text in Deepgram's buffer
+        if isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
+            await self.flush_audio()
+
+    async def _connect(self):
+        """Connect to Deepgram WebSocket and start receive task."""
+        await self._connect_websocket()
+
+        if self._websocket and not self._receive_task:
+            self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
+
+    async def _disconnect(self):
+        """Disconnect from Deepgram WebSocket and clean up tasks."""
+        if self._receive_task:
+            await self.cancel_task(self._receive_task)
+            self._receive_task = None
+
+        await self._disconnect_websocket()
+
+    async def _connect_websocket(self):
+        """Connect to Deepgram WebSocket API with configured settings."""
+        try:
+            if self._websocket and self._websocket.state is State.OPEN:
+                return
+
+            logger.debug("Connecting to Deepgram WebSocket")
+
+            # Build WebSocket URL with query parameters
+            params = []
+            params.append(f"model={self._voice_id}")
+            params.append(f"encoding={self._settings['encoding']}")
+            params.append(f"sample_rate={self.sample_rate}")
+
+            url = f"{self._base_url}/v1/speak?{'&'.join(params)}"
+
+            headers = {"Authorization": f"Token {self._api_key}"}
+
+            self._websocket = await websocket_connect(url, additional_headers=headers)
+
+            headers = {
+                k: v for k, v in self._websocket.response.headers.items() if k.startswith("dg-")
+            }
+            logger.debug(f'{self}: Websocket connection initialized: {{"headers": {headers}}}')
+
+            await self._call_event_handler("on_connected")
+        except Exception as e:
+            logger.error(f"{self} exception: {e}")
+            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            self._websocket = None
+            await self._call_event_handler("on_connection_error", f"{e}")
+
+    async def _disconnect_websocket(self):
+        """Close WebSocket connection and reset state."""
+        try:
+            await self.stop_all_metrics()
+
+            if self._websocket:
+                logger.debug("Disconnecting from Deepgram WebSocket")
+                # Send Close message to gracefully close the connection
+                await self._websocket.send(json.dumps({"type": "Close"}))
+                await self._websocket.close()
+        except Exception as e:
+            logger.error(f"{self} exception: {e}")
+            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+        finally:
+            self._websocket = None
+            await self._call_event_handler("on_disconnected")
+
+    def _get_websocket(self):
+        """Get active websocket connection or raise exception."""
+        if self._websocket:
+            return self._websocket
+        raise Exception("Websocket not connected")
+
+    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
+        """Handle interruption by sending Clear message to Deepgram.
+
+        The Clear message will clear Deepgram's internal text buffer and stop
+        sending audio, allowing for a new response to be generated.
+        """
+        await super()._handle_interruption(frame, direction)
+
+        # Send Clear message to stop current audio generation
+        if self._websocket:
+            try:
+                clear_msg = {"type": "Clear"}
+                await self._websocket.send(json.dumps(clear_msg))
+            except Exception as e:
+                logger.error(f"{self} error sending Clear message: {e}")
+
+    async def _receive_messages(self):
+        """Receive and process messages from Deepgram WebSocket."""
+        async for message in self._get_websocket():
+            if isinstance(message, bytes):
+                # Binary message contains audio data
+                await self.stop_ttfb_metrics()
+                frame = TTSAudioRawFrame(message, self.sample_rate, 1)
+                await self.push_frame(frame)
+            elif isinstance(message, str):
+                # Text message contains metadata or control messages
+                try:
+                    msg = json.loads(message)
+                    msg_type = msg.get("type")
+
+                    if msg_type == "Metadata":
+                        logger.trace(f"Received metadata: {msg}")
+                    elif msg_type == "Flushed":
+                        logger.trace(f"Received Flushed: {msg}")
+                        # Flushed indicates the end of audio generation for the current buffer
+                        # This happens after flush_audio() is called
+                    elif msg_type == "Cleared":
+                        logger.trace(f"Received Cleared: {msg}")
+                        # Buffer has been cleared after interruption
+                        # TTSStoppedFrame will be sent by the interruption handler
+                    elif msg_type == "Warning":
+                        logger.warning(
+                            f"{self} warning: {msg.get('description', 'Unknown warning')}"
+                        )
+                    else:
+                        logger.debug(f"Received unknown message type: {msg}")
+                except json.JSONDecodeError:
+                    logger.error(f"Invalid JSON message: {message}")
+
+    async def flush_audio(self):
+        """Flush any pending audio synthesis by sending Flush command.
+
+        This should be called when the LLM finishes a complete response to force
+        generation of audio from Deepgram's internal text buffer.
+        """
+        if self._websocket:
+            try:
+                flush_msg = {"type": "Flush"}
+                await self._websocket.send(json.dumps(flush_msg))
+            except Exception as e:
+                logger.error(f"{self} error sending Flush message: {e}")
+
    @traced_tts
    async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
-        """Generate speech from text using Deepgram's TTS API.
+        """Generate speech from text using Deepgram's WebSocket TTS API.

        Args:
            text: The text to synthesize into speech.
@@ -91,33 +279,27 @@ class DeepgramTTSService(TTSService):
        """
        logger.debug(f"{self}: Generating TTS [{text}]")

-        options = SpeakOptions(
-            model=self._voice_id,
-            encoding=self._settings["encoding"],
-            sample_rate=self.sample_rate,
-            container="none",
-        )
-
        try:
+            # Reconnect if the websocket is closed
+            if not self._websocket or self._websocket.state is State.CLOSED:
+                await self._connect()
+
            await self.start_ttfb_metrics()
-
-            response = await self._deepgram_client.speak.asyncrest.v("1").stream_raw(
-                {"text": text}, options
-            )
-
            await self.start_tts_usage_metrics(text)
+
            yield TTSStartedFrame()

-            async for data in response.aiter_bytes():
-                await self.stop_ttfb_metrics()
-                if data:
-                    yield TTSAudioRawFrame(audio=data, sample_rate=self.sample_rate, num_channels=1)
+            # Send text message to Deepgram
+            # Note: We don't send Flush here - that should only be sent when the
+            # LLM finishes a complete response via flush_audio()
+            speak_msg = {"type": "Speak", "text": text}
+            await self._get_websocket().send(json.dumps(speak_msg))

-            yield TTSStoppedFrame()
+            # The audio frames will be handled in _receive_messages
+            yield None

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")


 class DeepgramHttpTTSService(TTSService):
@@ -227,5 +409,4 @@ class DeepgramHttpTTSService(TTSService):
            yield TTSStoppedFrame()

        except Exception as e:
-            logger.exception(f"{self} exception: {e}")
            yield ErrorFrame(f"Error getting audio: {str(e)}")
--- a/src/pipecat/services/elevenlabs/stt.py
+++ b/src/pipecat/services/elevenlabs/stt.py
@@ -351,8 +351,7 @@ class ElevenLabsSTTService(SegmentedSTTService):
                )

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")


 def audio_format_from_sample_rate(sample_rate: int) -> str:
@@ -598,7 +597,6 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
                }
                await self._websocket.send(json.dumps(message))
            except Exception as e:
-                logger.error(f"Error sending audio: {e}")
                yield ErrorFrame(f"ElevenLabs Realtime STT error: {str(e)}")

        yield None
@@ -663,8 +661,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
            await self._call_event_handler("on_connected")
            logger.debug("Connected to ElevenLabs Realtime STT")
        except Exception as e:
-            logger.error(f"{self}: unable to connect to ElevenLabs Realtime STT: {e}")
-            await self.push_error(ErrorFrame(f"Connection error: {str(e)}"))
+            await self.push_error(
+                error_msg=f"Unable to connect to ElevenLabs Realtime STT: {e}", exception=e
+            )

    async def _disconnect_websocket(self):
        """Disconnect from ElevenLabs Realtime STT WebSocket."""
@@ -673,7 +672,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
                logger.debug("Disconnecting from ElevenLabs Realtime STT")
                await self._websocket.close()
        except Exception as e:
-            logger.error(f"{self} error closing websocket: {e}")
+            await self.push_error(error_msg=f"Error closing websocket: {e}", exception=e)
        finally:
            self._websocket = None
            await self._call_event_handler("on_disconnected")
@@ -733,17 +732,17 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
        elif message_type == "error":
            error_msg = data.get("error", "Unknown error")
            logger.error(f"ElevenLabs error: {error_msg}")
-            await self.push_error(ErrorFrame(f"Error: {error_msg}"))
+            await self.push_error(error_msg=f"Error: {error_msg}")

        elif message_type == "auth_error":
            error_msg = data.get("error", "Authentication error")
            logger.error(f"ElevenLabs auth error: {error_msg}")
-            await self.push_error(ErrorFrame(f"Auth error: {error_msg}"))
+            await self.push_error(error_msg=f"Auth error: {error_msg}")

        elif message_type == "quota_exceeded_error":
            error_msg = data.get("error", "Quota exceeded")
            logger.error(f"ElevenLabs quota exceeded: {error_msg}")
-            await self.push_error(ErrorFrame(f"Quota exceeded: {error_msg}"))
+            await self.push_error(error_msg=f"Quota exceeded: {error_msg}")

        else:
            logger.debug(f"Unknown message type: {message_type}")
--- a/src/pipecat/services/elevenlabs/tts.py
+++ b/src/pipecat/services/elevenlabs/tts.py
@@ -160,7 +160,7 @@ def build_elevenlabs_voice_settings(
 class PronunciationDictionaryLocator(BaseModel):
    """Locator for a pronunciation dictionary.

-    Attributes:
+    Parameters:
        pronunciation_dictionary_id: The ID of the pronunciation dictionary.
        version_id: The version ID of the pronunciation dictionary.
    """
@@ -424,8 +424,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService):
                        json.dumps({"context_id": self._context_id, "close_context": True})
                    )
            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+                await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
            self._context_id = None
            self._started = False

@@ -536,9 +535,8 @@ class ElevenLabsTTSService(AudioContextWordTTSService):

            await self._call_event_handler("on_connected")
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
            self._websocket = None
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
            await self._call_event_handler("on_connection_error", f"{e}")

    async def _disconnect_websocket(self):
@@ -553,8 +551,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService):
                await self._websocket.close()
                logger.debug("Disconnected from ElevenLabs")
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            self._started = False
            self._context_id = None
@@ -584,8 +581,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService):
                    json.dumps({"context_id": self._context_id, "close_context": True})
                )
            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+                await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
            self._context_id = None
            self._started = False
            self._partial_word = ""
@@ -735,20 +731,16 @@ class ElevenLabsTTSService(AudioContextWordTTSService):
                    await self._websocket.send(json.dumps(msg))
                    logger.trace(f"Created new context {self._context_id}")

-                    await self._send_text(text)
-                    await self.start_tts_usage_metrics(text)
-                else:
-                    await self._send_text(text)
+                await self._send_text(text)
+                await self.start_tts_usage_metrics(text)
            except Exception as e:
-                logger.error(f"{self} exception: {e}")
                yield TTSStoppedFrame()
-                yield ErrorFrame(error=f"{self} error: {e}")
+                yield ErrorFrame(error=f"Unknown error occurred: {e}")
                self._started = False
                return
            yield None
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")


 class ElevenLabsHttpTTSService(WordTTSService):
@@ -1043,7 +1035,6 @@ class ElevenLabsHttpTTSService(WordTTSService):
            ) as response:
                if response.status != 200:
                    error_text = await response.text()
-                    logger.error(f"{self} error: {error_text}")
                    yield ErrorFrame(error=f"ElevenLabs API error: {error_text}")
                    return

@@ -1091,8 +1082,7 @@ class ElevenLabsHttpTTSService(WordTTSService):
                        logger.warning(f"Failed to parse JSON from stream: {e}")
                        continue
                    except Exception as e:
-                        logger.error(f"{self} exception: {e}")
-                        yield ErrorFrame(error=f"{self} error: {e}")
+                        yield ErrorFrame(error=f"Unknown error occurred: {e}")
                        continue

                # After processing all chunks, emit any remaining partial word
@@ -1116,8 +1106,7 @@ class ElevenLabsHttpTTSService(WordTTSService):
                    self._previous_text = text

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")
        finally:
            await self.stop_ttfb_metrics()
            # Let the parent class handle TTSStoppedFrame
--- a/src/pipecat/services/fal/image.py
+++ b/src/pipecat/services/fal/image.py
@@ -110,7 +110,6 @@ class FalImageGenService(ImageGenService):
        image_url = response["images"][0]["url"] if response else None

        if not image_url:
-            logger.error(f"{self} error: image generation failed")
            yield ErrorFrame("Image generation failed")
            return

--- a/src/pipecat/services/fal/stt.py
+++ b/src/pipecat/services/fal/stt.py
@@ -290,5 +290,4 @@ class FalSTTService(SegmentedSTTService):
                    )

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")
--- a/src/pipecat/services/fish/tts.py
+++ b/src/pipecat/services/fish/tts.py
@@ -76,7 +76,7 @@ class FishAudioTTSService(InterruptibleTTSService):
        api_key: str,
        reference_id: Optional[str] = None,  # This is the voice ID
        model: Optional[str] = None,  # Deprecated
-        model_id: str = "speech-1.5",
+        model_id: str = "s1",
        output_format: FishAudioOutputFormat = "pcm",
        sample_rate: Optional[int] = None,
        params: Optional[InputParams] = None,
@@ -93,7 +93,7 @@ class FishAudioTTSService(InterruptibleTTSService):
                The `model` parameter is deprecated and will be removed in version 0.1.0.
                Use `reference_id` instead to specify the voice model.

-            model_id: Specify which Fish Audio TTS model to use (e.g. "speech-1.5")
+            model_id: Specify which Fish Audio TTS model to use (e.g. "s1")
            output_format: Audio output format. Defaults to "pcm".
            sample_rate: Audio sample rate. If None, uses default.
            params: Additional input parameters for voice customization.
@@ -228,8 +228,7 @@ class FishAudioTTSService(InterruptibleTTSService):

            await self._call_event_handler("on_connected")
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
            self._websocket = None
            await self._call_event_handler("on_connection_error", f"{e}")

@@ -243,8 +242,7 @@ class FishAudioTTSService(InterruptibleTTSService):
                await self._websocket.send(ormsgpack.packb(stop_message))
                await self._websocket.close()
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
        finally:
            self._request_id = None
            self._started = False
@@ -286,8 +284,7 @@ class FishAudioTTSService(InterruptibleTTSService):
                                continue

            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+                await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)

    @traced_tts
    async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
@@ -323,8 +320,7 @@ class FishAudioTTSService(InterruptibleTTSService):
                flush_message = {"event": "flush"}
                await self._get_websocket().send(ormsgpack.packb(flush_message))
            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                yield ErrorFrame(error=f"{self} error: {e}")
+                yield ErrorFrame(error=f"Unknown error occurred: {e}")
                yield TTSStoppedFrame()
                await self._disconnect()
                await self._connect()
@@ -332,5 +328,4 @@ class FishAudioTTSService(InterruptibleTTSService):
            yield None

        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            yield ErrorFrame(error=f"{self} error: {e}")
+            yield ErrorFrame(error=f"Unknown error occurred: {e}")
--- a/src/pipecat/services/gladia/stt.py
+++ b/src/pipecat/services/gladia/stt.py
@@ -468,8 +468,7 @@ class GladiaSTTService(STTService):
                            break

            except Exception as e:
-                logger.error(f"{self} exception: {e}")
-                await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+                await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
                self._connection_active = False

                if not self._should_reconnect:
@@ -559,8 +558,7 @@ class GladiaSTTService(STTService):
        except websockets.exceptions.ConnectionClosed:
            logger.debug("Connection closed during keepalive")
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)

    async def _receive_task_handler(self):
        try:
@@ -623,8 +621,7 @@ class GladiaSTTService(STTService):
            # Expected when closing the connection
            pass
        except Exception as e:
-            logger.error(f"{self} exception: {e}")
-            await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
+            await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)

    async def _maybe_reconnect(self) -> bool:
        """Handle exponential backoff reconnection logic."""
@@ -632,7 +629,9 @@ class GladiaSTTService(STTService):
            return False
        self._reconnection_attempts += 1
        if self._reconnection_attempts > self._max_reconnection_attempts:
-            logger.error(f"Max reconnection attempts ({self._max_reconnection_attempts}) reached")
+            await self.push_error(
+                error_msg=f"Max reconnection attempts ({self._max_reconnection_attempts}) reached",
+            )
            self._should_reconnect = False
            return False
        delay = self._reconnection_delay * (2 ** (self._reconnection_attempts - 1))
--- a/src/pipecat/services/google/gemini_live/llm.py
+++ b/src/pipecat/services/google/gemini_live/llm.py
@@ -68,6 +68,7 @@ from pipecat.processors.aggregators.openai_llm_context import (
 )
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.google.frames import LLMSearchOrigin, LLMSearchResponseFrame, LLMSearchResult
+from pipecat.services.google.utils import update_google_client_http_options
 from pipecat.services.llm_service import FunctionCallFromLLM, LLMService
 from pipecat.services.openai.llm import (
    OpenAIAssistantContextAggregator,
@@ -681,7 +682,7 @@ class GeminiLiveLLMService(LLMService):
        self._video_input_paused = start_video_paused
        self._context = None
        self._api_key = api_key
-        self._http_options = http_options
+        self._http_options = update_google_client_http_options(http_options)
        self._session: AsyncSession = None
        self._connection_task = None

@@ -1175,7 +1176,7 @@ class GeminiLiveLLMService(LLMService):
            self._connection_task = self.create_task(self._connection_task_handler(config=config))

        except Exception as e:
-            await self.push_error(ErrorFrame(error=f"{self} Initialization error: {e}"))
+            await self.push_error(error_msg=f"Initialization error: {e}", exception=e)

    async def _connection_task_handler(self, config: LiveConnectConfig):
        async with self._client.aio.live.connect(model=self._model_name, config=config) as session:
@@ -1252,11 +1253,11 @@ class GeminiLiveLLMService(LLMService):
        )

        if self._consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
-            logger.error(
+            error_msg = (
                f"Max consecutive failures ({MAX_CONSECUTIVE_FAILURES}) reached, "
                "treating as fatal error"
            )
-            await self.push_error(ErrorFrame(error=f"{self} Error in receive loop: {error}"))
+            await self.push_error(error_msg=error_msg, exception=error)
            return False
        else:
            logger.info(
@@ -1284,7 +1285,7 @@ class GeminiLiveLLMService(LLMService):
            self._completed_tool_calls = set()
            self._disconnecting = False
        except Exception as e:
-            logger.error(f"{self} error disconnecting: {e}")
+            await self.push_error(error_msg=f"Error disconnecting: {e}", exception=e)

    async def _send_user_audio(self, frame):
        """Send user audio frame to Gemini Live API."""
@@ -1723,6 +1724,8 @@ class GeminiLiveLLMService(LLMService):
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
            total_tokens=total_tokens,
+            cache_read_input_tokens=usage.cached_content_token_count,
+            reasoning_tokens=usage.thoughts_token_count,
        )

        await self.start_llm_usage_metrics(tokens)
@@ -1743,7 +1746,7 @@ class GeminiLiveLLMService(LLMService):
        # state management, and that exponential backoff for retries can have
        # cost/stability implications for a service cluster, let's just treat a
        # send-side error as fatal.
-        await self.push_error(ErrorFrame(error=f"{self} Send error: {error}", fatal=True))
+        await self.push_error(error_msg=f"Send error: {error}")

    def create_context_aggregator(
        self,
--- a/src/pipecat/services/google/gemini_live/llm_vertex.py
+++ b/src/pipecat/services/google/gemini_live/llm_vertex.py
@@ -126,6 +126,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService):
            credentials=self._credentials,
            project=self._project_id,
            location=self._location,
+            http_options=self._http_options,
        )

    @property
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1,2 @@`
				- Added `RimeNonJsonTTSService` which supports non-JSON streaming mode. This new class supports websocket streaming for the Arcana model.
				`@@ -0,0 +1 @@`
				- Added `on_conversation_detected` event to `VoicemaiDetector`.
				`@@ -0,0 +1 @@`
				- Added `x-goog-api-client` header with Pipecat's version to all Google services' requests.
				`@@ -0,0 +1 @@`
				- Made `"amazon.nova-2-sonic-v1:0"` the new default model for `AWSNovaSonicLLMService`.
				`@@ -0,0 +1 @@`
				- `FalSmartTurnAnalyzer` and `LocalSmartTurnAnalyzer` are deprecated and will be removed in a future version. Use `LocalSmartTurnAnalyzerV3` instead.