RTSP stream example

2025-10-09 13:54:32 +08:00
70 changed files with 4803 additions and 6211 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,28 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added

- The runner `--folder` argument now supports downloading files from
-  subdirectories.
-
-### Fixed
-
- Fixed an issue where `RimeHttpTTSService` and `PiperTTSService` could generate
-  incorrectly 16-bit aligned audio frames, potentially leading to internal
-  errors or static audio.
-
-## [0.0.90] - 2025-10-10
-
-### Added
-
- Added audio filter `KrispVivaFilter` using the Krisp VIVA SDK.
-
- Added `--folder` argument to the runner, allowing files saved in that folder
-  to be downloaded from `http://HOST:PORT/file/FILE`.
-
- Added `GeminiLiveVertexLLMService`, for accessing Gemini Live via Google
-  Vertex AI.
-
- Added some new configuration options to `GeminiLiveLLMService`:
+- Added some new configuration options to `GeminiMultimodalLiveLLMService`:

  - `thinking`
  - `enable_affective_dialog`
@@ -55,33 +34,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Changed

- Updated `GeminiLiveLLMService` to use the `google-genai` library rather than
-  use WebSockets directly.
-
-### Deprecated
-
- `LivekitFrameSerializer` is now deprecated. Use `LiveKitTransport` instead.
-
- `pipecat.service.openai_realtime` is now deprecated, use
-  `pipecat.services.openai.realtime` instead or
-  `pipecat.services.azure.realtime` for Azure Realtime.
-
- `pipecat.service.aws_nova_sonic` is now deprecated, use
-  `pipecat.services.aws.nova_sonic` instead.
-
- `GeminiMultimodalLiveLLMService` is now deprecated, use
-  `GeminiLiveLLMService`.
+- Updated `GeminiMultimodalLiveLLMService` to use the `google-genai` library
+  rather than use WebSockets directly.

 ### Fixed

- Fixed a `GoogleVertexLLMService` issue that would generate an error if no
-  token information was returned.
+- `GeminiMultimodalLiveLLMService` will now end gracefully (i.e. after the bot
+  has finished) upon receiving an `EndFrame`.

- `GeminiLiveLLMService` will now end gracefully (i.e. after the bot has
-  finished) upon receiving an `EndFrame`.
-
- `GeminiLiveLLMService` will try to seamlessly reconnect when it loses its
-  connection.
+- `GeminiMultimodalLiveLLMService` will try to seamlessly reconnect when it
+  loses its connection.

 ## [0.0.89] - 2025-10-07

@@ -1500,7 +1462,7 @@ quality and critical bugs impacting `ParallelPipelines` functionality.**
 - Added `session_token` parameter to `AWSNovaSonicLLMService`.

 - Added Gemini Multimodal Live File API for uploading, fetching, listing, and
-  deleting files. See `26f-gemini-live-files-api.py` for example usage.
+  deleting files. See `26f-gemini-multimodal-live-files-api.py` for example usage.

 ### Changed

@@ -3506,7 +3468,7 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
 - Added the new modalities option and helper function to set Gemini output
  modalities.

- Added `examples/foundational/26d-gemini-live-text.py` which is
+- Added `examples/foundational/26d-gemini-multimodal-live-text.py` which is
  using Gemini as TEXT modality and using another TTS provider for TTS process.

 ### Changed
@@ -3693,9 +3655,9 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
 - Added new foundational examples for `GeminiMultimodalLiveLLMService`:

  - `26-gemini-multimodal-live.py`
-  - `26a-gemini-live-transcription.py`
-  - `26b-gemini-live-video.py`
-  - `26c-gemini-live-video.py`
+  - `26a-gemini-multimodal-live-transcription.py`
+  - `26b-gemini-multimodal-live-video.py`
+  - `26c-gemini-multimodal-live-video.py`

 - Added `SimliVideoService`. This is an integration for Simli AI avatars.
  (see https://www.simli.com)
--- a/README.md
+++ b/README.md
@@ -3,7 +3,6 @@
 </div></h1>

 [![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) ![Tests](https://github.com/pipecat-ai/pipecat/actions/workflows/tests.yaml/badge.svg) [![codecov](https://codecov.io/gh/pipecat-ai/pipecat/graph/badge.svg?token=LNVUIVO4Y9)](https://codecov.io/gh/pipecat-ai/pipecat) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/pipecat-ai/pipecat)
-[![](https://getmanta.ai/api/badges?text=Manta%20Graph&link=manta)](https://getmanta.ai/pipecat)

 # 🎙️ Pipecat: Real-Time Voice & Multimodal AI Agents

--- a/docs/api/conf.py
+++ b/docs/api/conf.py
@@ -50,7 +50,6 @@ autodoc_mock_imports = [
    # Krisp - has build issues on some platforms
    "pipecat_ai_krisp",
    "krisp",
-    "krisp_audio",
    # System-specific GUI libraries
    "_tkinter",
    "tkinter",
--- a/env.example
+++ b/env.example
@@ -90,9 +90,6 @@ SIMLI_FACE_ID=...
 # Krisp
 KRISP_MODEL_PATH=...

-# Krisp Viva
-KRISP_VIVA_MODEL_PATH=...
-
 # DeepSeek
 DEEPSEEK_API_KEY=...

--- a/examples/foundational/07p-interruptible-krisp-viva.py
+++ b/examples/foundational/07p-interruptible-krisp-viva.py
@@ -1,129 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-
-import os
-
-from dotenv import load_dotenv
-from loguru import logger
-
-from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter
-from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
-from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
-from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.audio.vad.vad_analyzer import VADParams
-from pipecat.frames.frames import LLMRunFrame
-from pipecat.pipeline.pipeline import Pipeline
-from pipecat.pipeline.runner import PipelineRunner
-from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
-from pipecat.runner.types import RunnerArguments
-from pipecat.runner.utils import create_transport
-from pipecat.services.deepgram.stt import DeepgramSTTService
-from pipecat.services.deepgram.tts import DeepgramTTSService
-from pipecat.services.openai.llm import OpenAILLMService
-from pipecat.transports.base_transport import BaseTransport, TransportParams
-from pipecat.transports.daily.transport import DailyParams
-from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
-
-load_dotenv(override=True)
-
-# We store functions so objects (e.g. SileroVADAnalyzer) don't get
-# instantiated. The function will be called when the desired transport gets
-# selected.
-transport_params = {
-    "daily": lambda: DailyParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
-        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
-        audio_in_filter=KrispVivaFilter(),
-    ),
-    "twilio": lambda: FastAPIWebsocketParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
-        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
-        audio_in_filter=KrispVivaFilter(),
-    ),
-    "webrtc": lambda: TransportParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
-        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
-        audio_in_filter=KrispVivaFilter(),
-    ),
-}
-
-
-async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
-    logger.info(f"Starting bot")
-
-    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
-
-    tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
-
-    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
-
-    messages = [
-        {
-            "role": "system",
-            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
-        },
-    ]
-
-    context = LLMContext(messages)
-    context_aggregator = LLMContextAggregatorPair(context)
-
-    pipeline = Pipeline(
-        [
-            transport.input(),  # Transport user input
-            stt,  # STT
-            context_aggregator.user(),  # User responses
-            llm,  # LLM
-            tts,  # TTS
-            transport.output(),  # Transport bot output
-            context_aggregator.assistant(),  # Assistant spoken responses
-        ]
-    )
-
-    task = PipelineTask(
-        pipeline,
-        params=PipelineParams(
-            enable_metrics=True,
-            enable_usage_metrics=True,
-        ),
-        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
-    )
-
-    @transport.event_handler("on_client_connected")
-    async def on_client_connected(transport, client):
-        logger.info(f"Client connected")
-        # Kick off the conversation.
-        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
-        await task.queue_frames([LLMRunFrame()])
-
-    @transport.event_handler("on_client_disconnected")
-    async def on_client_disconnected(transport, client):
-        logger.info(f"Client disconnected")
-        await task.cancel()
-
-    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
-
-    await runner.run(task)
-
-
-async def bot(runner_args: RunnerArguments):
-    """Main bot entry point compatible with Pipecat Cloud."""
-    transport = await create_transport(runner_args, transport_params)
-    await run_bot(transport, runner_args)
-
-
-if __name__ == "__main__":
-    from pipecat.runner.run import main
-
-    main()
--- a/examples/foundational/14p-function-calling-gemini-vertex-ai.py
+++ b/examples/foundational/14p-function-calling-gemini-vertex-ai.py
@@ -76,8 +76,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    llm = GoogleVertexLLMService(
        credentials=os.getenv("GOOGLE_VERTEX_TEST_CREDENTIALS"),
-        project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
-        location=os.getenv("GOOGLE_CLOUD_LOCATION"),
+        params=GoogleVertexLLMService.InputParams(
+            project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
+        ),
    )
    # You can aslo register a function_name of None to get all functions
    # sent to the same callback with an additional function_name parameter.
--- a/examples/foundational/18-gstreamer-filesrc.py
+++ b/examples/foundational/18-gstreamer-filesrc.py
@@ -21,7 +21,7 @@ from pipecat.transports.daily.transport import DailyParams
 load_dotenv(override=True)

 parser = argparse.ArgumentParser(description="Pipecat Video Streaming Bot")
-parser.add_argument("-i", "--input", type=str, required=True, help="Input video file")
+parser.add_argument("-i", "--input", type=str, required=False, help="Input video file")
 args = parser.parse_args()

 # We store functions so objects (e.g. SileroVADAnalyzer) don't get
@@ -48,8 +48,9 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot with video input: {args.input}")

+    location = "rtsp://rtspstream:9bGdZ6NKfRXnMbFAg71al@zephyr.rtsp.stream/people"
    gst = GStreamerPipelineSource(
-        pipeline=f"filesrc location={args.input}",
+        pipeline=(f"rtspsrc location={location} ! decodebin ! autovideosink"),
        out_params=GStreamerPipelineSource.OutputParams(
            video_width=1280,
            video_height=720,
--- a/examples/foundational/18-openai-realtime-usage.py
+++ b/examples/foundational/18-openai-realtime-usage.py
@@ -1,156 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-"""Example: Print OpenAI Realtime API Token Usage Statistics
-
-This example demonstrates how to access and print token usage statistics
-from the OpenAI Realtime API, including detailed breakdowns of input/output
-tokens, cached tokens, and audio/text token usage.
-"""
-
-import os
-
-from dotenv import load_dotenv
-from loguru import logger
-
-from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.audio.vad.vad_analyzer import VADParams
-from pipecat.pipeline.pipeline import Pipeline
-from pipecat.pipeline.runner import PipelineRunner
-from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.runner.types import RunnerArguments
-from pipecat.runner.utils import create_transport
-from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
-from pipecat.transports.base_transport import BaseTransport, TransportParams
-from pipecat.transports.daily.transport import DailyParams
-from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
-
-load_dotenv(override=True)
-
-# We store functions so objects don't get instantiated until the desired
-# transport gets selected.
-transport_params = {
-    "daily": lambda: DailyParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
-    ),
-    "twilio": lambda: FastAPIWebsocketParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
-    ),
-    "webrtc": lambda: TransportParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
-    ),
-}
-
-
-async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
-    """Main function demonstrating usage statistics tracking."""
-    logger.info(f"Starting bot")
-
-    # Initialize the OpenAI Realtime service
-    llm = OpenAIRealtimeLLMService(
-        api_key=os.getenv("OPENAI_API_KEY") or "",
-        model="gpt-4o-realtime-preview-2024-12-17",
-    )
-
-    # To access usage statistics, we wrap the internal response handler
-    # This is the cleanest way to intercept usage data from the realtime API
-    original_handler = llm._handle_evt_response_done
-
-    async def custom_response_done_handler(evt):
-        """Custom handler that prints usage stats before calling original handler."""
-        # Print usage statistics if available
-        if evt.response.usage:
-            usage = evt.response.usage
-
-            logger.info("\n" + "=" * 50)
-            logger.info("📊 TOKEN USAGE STATISTICS")
-            logger.info("=" * 50)
-            logger.info(f"Total tokens: {usage.total_tokens}")
-            logger.info(f"Input tokens: {usage.input_tokens}")
-            logger.info(f"Output tokens: {usage.output_tokens}")
-
-            # Input token details
-            if usage.input_token_details:
-                logger.info(f"\n📥 Input token breakdown:")
-                logger.info(f"  • Cached tokens: {usage.input_token_details.cached_tokens}")
-                logger.info(f"  • Text tokens: {usage.input_token_details.text_tokens}")
-                logger.info(f"  • Audio tokens: {usage.input_token_details.audio_tokens}")
-
-                # Cached token details if available
-                if usage.input_token_details.cached_tokens_details:
-                    logger.info(
-                        f"  • Cached text tokens: {usage.input_token_details.cached_tokens_details.text_tokens}"
-                    )
-                    logger.info(
-                        f"  • Cached audio tokens: {usage.input_token_details.cached_tokens_details.audio_tokens}"
-                    )
-
-            # Output token details
-            if usage.output_token_details:
-                logger.info(f"\n📤 Output token breakdown:")
-                logger.info(f"  • Text tokens: {usage.output_token_details.text_tokens}")
-                logger.info(f"  • Audio tokens: {usage.output_token_details.audio_tokens}")
-
-            logger.info("=" * 50 + "\n")
-
-        # Call the original handler to maintain normal functionality
-        await original_handler(evt)
-
-    # Replace the handler with our custom one
-    llm._handle_evt_response_done = custom_response_done_handler
-
-    # Create pipeline
-    pipeline = Pipeline(
-        [
-            transport.input(),
-            llm,
-            transport.output(),
-        ]
-    )
-
-    # Create task
-    task = PipelineTask(
-        pipeline,
-        params=PipelineParams(
-            allow_interruptions=True,
-            enable_metrics=True,
-            enable_usage_metrics=True,
-        ),
-        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
-    )
-
-    @transport.event_handler("on_client_connected")
-    async def on_client_connected(transport, client):
-        logger.info("Client connected")
-        logger.info("🎤 Speak into your microphone to interact with the assistant")
-        logger.info("📊 Usage statistics will be printed after each response")
-
-    @transport.event_handler("on_client_disconnected")
-    async def on_client_disconnected(transport, client):
-        logger.info("Client disconnected")
-        await task.cancel()
-
-    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
-
-    await runner.run(task)
-
-
-async def bot(runner_args: RunnerArguments):
-    """Main bot entry point compatible with Pipecat Cloud."""
-    transport = await create_transport(runner_args, transport_params)
-    await run_bot(transport, runner_args)
-
-
-if __name__ == "__main__":
-    from pipecat.runner.run import main
-
-    main()
--- a/examples/foundational/19-openai-realtime.py
+++ b/examples/foundational/19-openai-realtime.py
@@ -24,15 +24,14 @@ from pipecat.processors.transcript_processor import TranscriptProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.openai.realtime.events import (
-    AudioConfiguration,
-    AudioInput,
+from pipecat.services.openai_realtime import (
    InputAudioNoiseReduction,
    InputAudioTranscription,
+    OpenAIRealtimeLLMService,
    SemanticTurnDetection,
    SessionProperties,
 )
-from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
+from pipecat.services.openai_realtime.events import AudioConfiguration, AudioInput
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
--- a/examples/foundational/19a-azure-realtime.py
+++ b/examples/foundational/19a-azure-realtime.py
@@ -21,14 +21,13 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.openai.realtime.events import (
-    AudioConfiguration,
-    AudioInput,
+from pipecat.services.openai_realtime import (
+    AzureRealtimeLLMService,
    InputAudioTranscription,
    SessionProperties,
 )
+from pipecat.services.openai_realtime.events import AudioConfiguration, AudioInput
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
--- a/examples/foundational/19b-openai-realtime-text.py
+++ b/examples/foundational/19b-openai-realtime-text.py
@@ -22,17 +22,16 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.processors.transcript_processor import TranscriptProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.cartesia import CartesiaTTSService
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.openai.realtime.events import (
-    AudioConfiguration,
-    AudioInput,
+from pipecat.services.openai_realtime import (
    InputAudioNoiseReduction,
    InputAudioTranscription,
+    OpenAIRealtimeLLMService,
    SemanticTurnDetection,
    SessionProperties,
 )
-from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
+from pipecat.services.openai_realtime.events import AudioConfiguration, AudioInput
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
--- a/examples/foundational/20b-persistent-context-openai-realtime.py
+++ b/examples/foundational/20b-persistent-context-openai-realtime.py
@@ -25,14 +25,13 @@ from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.openai.realtime.events import (
-    AudioConfiguration,
-    AudioInput,
+from pipecat.services.openai_realtime import (
    InputAudioTranscription,
+    OpenAIRealtimeLLMService,
    SessionProperties,
    TurnDetection,
 )
-from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
+from pipecat.services.openai_realtime.events import AudioConfiguration, AudioInput
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
--- a/examples/foundational/20e-persistent-context-aws-nova-sonic.py
+++ b/examples/foundational/20e-persistent-context-aws-nova-sonic.py
@@ -23,7 +23,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.aws.nova_sonic.llm import AWSNovaSonicLLMService
+from pipecat.services.aws_nova_sonic.aws import AWSNovaSonicLLMService
 from pipecat.services.llm_service import FunctionCallParams
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
--- a/examples/foundational/26-gemini-multimodal-live.py
+++ b/examples/foundational/26-gemini-multimodal-live.py
@@ -17,7 +17,7 @@ from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -65,7 +65,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    Respond to what the user said in a creative and helpful way.
    """

-    llm = GeminiLiveLLMService(
+    llm = GeminiMultimodalLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=system_instruction,
        voice_id="Puck",  # Aoede, Charon, Fenrir, Kore, Puck
--- a/examples/foundational/26a-gemini-multimodal-live-transcription.py
+++ b/examples/foundational/26a-gemini-multimodal-live-transcription.py
@@ -20,7 +20,7 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.processors.transcript_processor import TranscriptProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -65,7 +65,7 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    llm = GeminiLiveLLMService(
+    llm = GeminiMultimodalLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        voice_id="Aoede",  # Puck, Charon, Kore, Fenrir, Aoede
        # system_instruction="Talk like a pirate."
--- a/examples/foundational/26b-gemini-multimodal-live-function-calling.py
+++ b/examples/foundational/26b-gemini-multimodal-live-function-calling.py
@@ -22,7 +22,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
 from pipecat.services.llm_service import FunctionCallParams
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -122,15 +122,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        required=["location"],
    )
    search_tool = {"google_search": {}}
-    # KNOWN ISSUE: If using GeminiVertexLiveLLMService, it appears
-    # you cannot use the "google_search" tool alongside other tools.
-    # See https://github.com/googleapis/python-genai/issues/941.
    tools = ToolsSchema(
        standard_tools=[weather_function, restaurant_function],
        custom_tools={AdapterType.GEMINI: [search_tool]},
    )

-    llm = GeminiLiveLLMService(
+    llm = GeminiMultimodalLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=system_instruction,
        tools=tools,
--- a/examples/foundational/26c-gemini-multimodal-live-video.py
+++ b/examples/foundational/26c-gemini-multimodal-live-video.py
@@ -24,7 +24,7 @@ from pipecat.runner.utils import (
    maybe_capture_participant_camera,
    maybe_capture_participant_screen,
 )
-from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams

@@ -58,7 +58,7 @@ transport_params = {


 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
-    llm = GeminiLiveLLMService(
+    llm = GeminiMultimodalLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        voice_id="Aoede",  # Puck, Charon, Kore, Fenrir, Aoede
        # system_instruction="Talk like a pirate."
--- a/examples/foundational/26d-gemini-multimodal-live-text.py
+++ b/examples/foundational/26d-gemini-multimodal-live-text.py
@@ -20,9 +20,9 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.cartesia.tts import CartesiaTTSService
-from pipecat.services.google.gemini_live.llm import (
-    GeminiLiveLLMService,
-    GeminiModalities,
+from pipecat.services.gemini_multimodal_live.gemini import (
+    GeminiMultimodalLiveLLMService,
+    GeminiMultimodalModalities,
    InputParams,
 )
 from pipecat.transports.base_transport import BaseTransport, TransportParams
@@ -80,15 +80,11 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    # KNOWN ISSUE: If using GeminiLiveVertexLLMService, you cannot specify a
-    # modality other than AUDIO (at least not if using the service's default
-    # model, which is a native audio model:
-    # https://cloud.google.com/vertex-ai/generative-ai/docs/live-api/tools#native-audio).
-    llm = GeminiLiveLLMService(
+    llm = GeminiMultimodalLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=SYSTEM_INSTRUCTION,
        tools=[{"google_search": {}}, {"code_execution": {}}],
-        params=InputParams(modalities=GeminiModalities.TEXT),
+        params=InputParams(modalities=GeminiMultimodalModalities.TEXT),
    )

    # Optionally, you can set the response modalities via a function
--- a/examples/foundational/26e-gemini-multimodal-google-search.py
+++ b/examples/foundational/26e-gemini-multimodal-google-search.py
@@ -19,7 +19,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -83,7 +83,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

    # Initialize the Gemini Multimodal Live model
-    llm = GeminiLiveLLMService(
+    llm = GeminiMultimodalLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        voice_id="Puck",  # Aoede, Charon, Fenrir, Kore, Puck
        system_instruction=system_instruction,
--- a/examples/foundational/26f-gemini-multimodal-live-files-api.py
+++ b/examples/foundational/26f-gemini-multimodal-live-files-api.py
@@ -19,7 +19,9 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.gemini_multimodal_live.gemini import (
+    GeminiMultimodalLiveLLMService,
+)
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -108,7 +110,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    """

    # Initialize Gemini service with File API support
-    llm = GeminiLiveLLMService(
+    llm = GeminiMultimodalLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=system_instruction,
        voice_id="Charon",  # Aoede, Charon, Fenrir, Kore, Puck
--- a/examples/foundational/26g-gemini-multimodal-live-groundingMetadata.py
+++ b/examples/foundational/26g-gemini-multimodal-live-groundingMetadata.py
@@ -9,13 +9,13 @@ from pipecat.audio.vad.vad_analyzer import VADParams
 from pipecat.frames.frames import Frame, LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
-from pipecat.pipeline.task import PipelineTask
+from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
+from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
 from pipecat.services.google.frames import LLMSearchResponseFrame
-from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -105,7 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        custom_tools={AdapterType.GEMINI: [{"google_search": {}}, {"code_execution": {}}]},
    )

-    llm = GeminiLiveLLMService(
+    llm = GeminiMultimodalLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=SYSTEM_INSTRUCTION,
        voice_id="Charon",  # Aoede, Charon, Fenrir, Kore, Puck
--- a/examples/foundational/26h-gemini-live-vertex-function-calling.py
+++ b/examples/foundational/26h-gemini-live-vertex-function-calling.py
@@ -1,191 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-
-import os
-from datetime import datetime
-
-from dotenv import load_dotenv
-from google.genai.types import HttpOptions
-from loguru import logger
-
-from pipecat.adapters.schemas.function_schema import FunctionSchema
-from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.audio.vad.vad_analyzer import VADParams
-from pipecat.frames.frames import LLMRunFrame
-from pipecat.pipeline.pipeline import Pipeline
-from pipecat.pipeline.runner import PipelineRunner
-from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
-from pipecat.runner.types import RunnerArguments
-from pipecat.runner.utils import create_transport
-from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
-from pipecat.services.google.gemini_live.llm_vertex import GeminiLiveVertexLLMService
-from pipecat.services.llm_service import FunctionCallParams
-from pipecat.transports.base_transport import BaseTransport, TransportParams
-from pipecat.transports.daily.transport import DailyParams
-from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
-
-load_dotenv(override=True)
-
-
-async def fetch_weather_from_api(params: FunctionCallParams):
-    temperature = 75 if params.arguments["format"] == "fahrenheit" else 24
-    await params.result_callback(
-        {
-            "conditions": "nice",
-            "temperature": temperature,
-            "format": params.arguments["format"],
-            "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
-        }
-    )
-
-
-async def fetch_restaurant_recommendation(params: FunctionCallParams):
-    await params.result_callback({"name": "The Golden Dragon"})
-
-
-system_instruction = """
-You are a helpful assistant who can answer questions and use tools.
-
-You have three tools available to you:
-1. get_current_weather: Use this tool to get the current weather in a specific location.
-2. get_restaurant_recommendation: Use this tool to get a restaurant recommendation in a specific location.
-"""
-
-
-# We store functions so objects (e.g. SileroVADAnalyzer) don't get
-# instantiated. The function will be called when the desired transport gets
-# selected.
-transport_params = {
-    "daily": lambda: DailyParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        # set stop_secs to something roughly similar to the internal setting
-        # of the Multimodal Live api, just to align events. This doesn't really
-        # matter because we can only use the Multimodal Live API's phrase
-        # endpointing, for now.
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
-    ),
-    "twilio": lambda: FastAPIWebsocketParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        # set stop_secs to something roughly similar to the internal setting
-        # of the Multimodal Live api, just to align events. This doesn't really
-        # matter because we can only use the Multimodal Live API's phrase
-        # endpointing, for now.
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
-    ),
-    "webrtc": lambda: TransportParams(
-        audio_in_enabled=True,
-        audio_out_enabled=True,
-        # set stop_secs to something roughly similar to the internal setting
-        # of the Multimodal Live api, just to align events. This doesn't really
-        # matter because we can only use the Multimodal Live API's phrase
-        # endpointing, for now.
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
-    ),
-}
-
-
-async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
-    logger.info(f"Starting bot")
-
-    weather_function = FunctionSchema(
-        name="get_current_weather",
-        description="Get the current weather",
-        properties={
-            "location": {
-                "type": "string",
-                "description": "The city and state, e.g. San Francisco, CA",
-            },
-            "format": {
-                "type": "string",
-                "enum": ["celsius", "fahrenheit"],
-                "description": "The temperature unit to use. Infer this from the user's location.",
-            },
-        },
-        required=["location", "format"],
-    )
-    restaurant_function = FunctionSchema(
-        name="get_restaurant_recommendation",
-        description="Get a restaurant recommendation",
-        properties={
-            "location": {
-                "type": "string",
-                "description": "The city and state, e.g. San Francisco, CA",
-            },
-        },
-        required=["location"],
-    )
-    # KNOWN ISSUE: If using GeminiVertexLiveLLMService, it appears
-    # you cannot use the "google_search" tool alongside other tools.
-    # See https://github.com/googleapis/python-genai/issues/941.
-    tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
-
-    llm = GeminiLiveVertexLLMService(
-        credentials=os.getenv("GOOGLE_VERTEX_TEST_CREDENTIALS"),
-        project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
-        location=os.getenv("GOOGLE_CLOUD_LOCATION"),
-        system_instruction=system_instruction,
-        voice_id="Puck",  # Aoede, Charon, Fenrir, Kore, Puck
-        tools=tools,
-    )
-
-    llm.register_function("get_current_weather", fetch_weather_from_api)
-    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
-
-    context = OpenAILLMContext(
-        [{"role": "user", "content": "Say hello."}],
-    )
-    context_aggregator = llm.create_context_aggregator(context)
-
-    pipeline = Pipeline(
-        [
-            transport.input(),
-            context_aggregator.user(),
-            llm,
-            transport.output(),
-            context_aggregator.assistant(),
-        ]
-    )
-
-    task = PipelineTask(
-        pipeline,
-        params=PipelineParams(
-            enable_metrics=True,
-            enable_usage_metrics=True,
-        ),
-        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
-    )
-
-    @transport.event_handler("on_client_connected")
-    async def on_client_connected(transport, client):
-        logger.info(f"Client connected")
-        # Kick off the conversation.
-        await task.queue_frames([LLMRunFrame()])
-
-    @transport.event_handler("on_client_disconnected")
-    async def on_client_disconnected(transport, client):
-        logger.info(f"Client disconnected")
-        await task.cancel()
-
-    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
-
-    await runner.run(task)
-
-
-async def bot(runner_args: RunnerArguments):
-    """Main bot entry point compatible with Pipecat Cloud."""
-    transport = await create_transport(runner_args, transport_params)
-    await run_bot(transport, runner_args)
-
-
-if __name__ == "__main__":
-    from pipecat.runner.run import main
-
-    main()
--- a/examples/foundational/26i-gemini-multimodal-live-graceful-end.py
+++ b/examples/foundational/26i-gemini-multimodal-live-graceful-end.py
@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #

+
+import asyncio
 import os
 from datetime import datetime

@@ -22,7 +24,7 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
 from pipecat.services.llm_service import FunctionCallParams
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -142,7 +144,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        custom_tools={AdapterType.GEMINI: [search_tool]},
    )

-    llm = GeminiLiveLLMService(
+    llm = GeminiMultimodalLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        system_instruction=system_instruction,
        tools=tools,
--- a/examples/foundational/40-aws-nova-sonic.py
+++ b/examples/foundational/40-aws-nova-sonic.py
@@ -21,7 +21,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.aws.nova_sonic.llm import AWSNovaSonicLLMService
+from pipecat.services.aws_nova_sonic import AWSNovaSonicLLMService
 from pipecat.services.llm_service import FunctionCallParams
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
--- a/examples/foundational/46-video-processing.py
+++ b/examples/foundational/46-video-processing.py
@@ -20,7 +20,7 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.processors.frameworks.rtvi import RTVIObserver, RTVIProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.gemini_multimodal_live import GeminiMultimodalLiveLLMService
 from pipecat.transports.base_transport import TransportParams
 from pipecat.transports.daily.transport import DailyParams, DailyTransport

@@ -94,7 +94,7 @@ Respond to what the user said in a creative and helpful way. Keep your responses


 async def run_bot(pipecat_transport):
-    llm = GeminiLiveLLMService(
+    llm = GeminiMultimodalLiveLLMService(
        api_key=os.getenv("GOOGLE_API_KEY"),
        voice_id="Puck",  # Aoede, Charon, Fenrir, Kore, Puck
        transcribe_user_audio=True,
--- a/examples/foundational/README.md
+++ b/examples/foundational/README.md
@@ -105,7 +105,7 @@ uv run 07-interruptible.py -t twilio -x NGROK_HOST_NAME
 ### Vision & Multimodal

 - **[12a-describe-video-gemini-flash.py](./12a-describe-video-gemini-flash.py)**: Bot describes user's video (Video input, Multimodal LLMs)
- **[26c-gemini-live-video.py](./26c-gemini-live-video.py)**: Gemini with video input (Streaming video, Function calls)
+- **[26c-gemini-multimodal-live-video.py](./26c-gemini-multimodal-live-video.py)**: Gemini with video input (Streaming video, Function calls)

 ### Voice & Language

--- a/scripts/evals/run-release-evals.py
+++ b/scripts/evals/run-release-evals.py
@@ -147,10 +147,7 @@ TESTS_15 = [
 ]

 TESTS_19 = [
-    ("19-openai-realtime.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
    ("19-openai-realtime-beta.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
-    # OpenAI Realtime not released on Azure yet
-    # ("19a-azure-realtime.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
    ("19a-azure-realtime-beta.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
    ("19b-openai-realtime-text.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
    ("19b-openai-realtime-beta-text.py", PROMPT_WEATHER, EVAL_WEATHER, BOT_SPEAKS_FIRST),
@@ -163,18 +160,18 @@ TESTS_21 = [
 TESTS_26 = [
    ("26-gemini-multimodal-live.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
    (
-        "26a-gemini-live-transcription.py",
+        "26a-gemini-multimodal-live-transcription.py",
        PROMPT_SIMPLE_MATH,
        EVAL_SIMPLE_MATH,
        BOT_SPEAKS_FIRST,
    ),
    (
-        "26b-gemini-live-function-calling.py",
+        "26b-gemini-multimodal-live-function-calling.py",
        PROMPT_WEATHER,
        EVAL_WEATHER,
        BOT_SPEAKS_FIRST,
    ),
-    ("26c-gemini-live-video.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
+    ("26c-gemini-multimodal-live-video.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
    (
        "26e-gemini-multimodal-google-search.py",
        PROMPT_ONLINE_SEARCH,
@@ -182,13 +179,7 @@ TESTS_26 = [
        BOT_SPEAKS_FIRST,
    ),
    # Currently not working.
-    # ("26d-gemini-live-text.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
-    (
-        "26h-gemini-live-vertex-function-calling.py",
-        PROMPT_WEATHER,
-        EVAL_WEATHER,
-        BOT_SPEAKS_FIRST,
-    ),
+    # ("26d-gemini-multimodal-live-text.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
 ]

 TESTS_27 = [
--- a/src/pipecat/adapters/services/gemini_adapter.py
+++ b/src/pipecat/adapters/services/gemini_adapter.py
@@ -87,11 +87,9 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
            Includes both converted standard tools and any custom Gemini-specific tools.
        """
        functions_schema = tools_schema.standard_tools
-        formatted_standard_tools = (
-            [{"function_declarations": [func.to_default_dict() for func in functions_schema]}]
-            if functions_schema
-            else []
-        )
+        formatted_standard_tools = [
+            {"function_declarations": [func.to_default_dict() for func in functions_schema]}
+        ]
        custom_gemini_tools = []
        if tools_schema.custom_tools:
            custom_gemini_tools = tools_schema.custom_tools.get(AdapterType.GEMINI, [])
--- a/src/pipecat/audio/filters/krisp_viva_filter.py
+++ b/src/pipecat/audio/filters/krisp_viva_filter.py
@@ -1,193 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-"""Krisp noise reduction audio filter for Pipecat.
-
-This module provides an audio filter implementation using Krisp VIVA SDK.
-"""
-
-import os
-
-import numpy as np
-from loguru import logger
-
-from pipecat.audio.filters.base_audio_filter import BaseAudioFilter
-from pipecat.frames.frames import FilterControlFrame, FilterEnableFrame
-
-try:
-    import krisp_audio
-except ModuleNotFoundError as e:
-    logger.error(f"Exception: {e}")
-    logger.error("In order to use the Krisp filter, you need to install krisp_audio.")
-    raise Exception(f"Missing module: {e}")
-
-
-def _log_callback(log_message, log_level):
-    logger.info(f"[{log_level}] {log_message}")
-
-
-class KrispVivaFilter(BaseAudioFilter):
-    """Audio filter using the Krisp VIVA SDK.
-
-    Provides real-time noise reduction for audio streams using Krisp's
-    proprietary noise suppression algorithms. This filter requires a
-    valid Krisp model file to operate.
-
-    Supported sample rates:
-        - 8000 Hz
-        - 16000 Hz
-        - 24000 Hz
-        - 32000 Hz
-        - 44100 Hz
-        - 48000 Hz
-    """
-
-    # Initialize Krisp Audio SDK globally
-    krisp_audio.globalInit("", _log_callback, krisp_audio.LogLevel.Off)
-    SDK_VERSION = krisp_audio.getVersion()
-    logger.debug(
-        f"Krisp Audio Python SDK Version: {SDK_VERSION.major}."
-        f"{SDK_VERSION.minor}.{SDK_VERSION.patch}"
-    )
-
-    SAMPLE_RATES = {
-        8000: krisp_audio.SamplingRate.Sr8000Hz,
-        16000: krisp_audio.SamplingRate.Sr16000Hz,
-        24000: krisp_audio.SamplingRate.Sr24000Hz,
-        32000: krisp_audio.SamplingRate.Sr32000Hz,
-        44100: krisp_audio.SamplingRate.Sr44100Hz,
-        48000: krisp_audio.SamplingRate.Sr48000Hz,
-    }
-
-    FRAME_SIZE_MS = 10  # Krisp requires audio frames of 10ms duration for processing.
-
-    def __init__(self, model_path: str = None, noise_suppression_level: int = 100) -> None:
-        """Initialize the Krisp noise reduction filter.
-
-        Args:
-            model_path: Path to the Krisp model file (.kef extension).
-                If None, uses KRISP_VIVA_MODEL_PATH environment variable.
-            noise_suppression_level: Noise suppression level.
-
-        Raises:
-            ValueError: If model_path is not provided and KRISP_VIVA_MODEL_PATH is not set.
-            Exception: If model file doesn't have .kef extension.
-            FileNotFoundError: If model file doesn't exist.
-        """
-        super().__init__()
-
-        # Set model path, checking environment if not specified
-        self._model_path = model_path or os.getenv("KRISP_VIVA_MODEL_PATH")
-        if not self._model_path:
-            logger.error("Model path is not provided and KRISP_VIVA_MODEL_PATH is not set.")
-            raise ValueError("Model path for KrispAudioProcessor must be provided.")
-
-        if not self._model_path.endswith(".kef"):
-            raise Exception("Model is expected with .kef extension")
-
-        if not os.path.isfile(self._model_path):
-            raise FileNotFoundError(f"Model file not found: {self._model_path}")
-
-        self._filtering = True
-        self._session = None
-        self._samples_per_frame = None
-        self._noise_suppression_level = noise_suppression_level
-
-        # Audio buffer to accumulate samples for complete frames
-        self._audio_buffer = bytearray()
-
-    def _int_to_sample_rate(self, sample_rate):
-        """Convert integer sample rate to krisp_audio SamplingRate enum.
-
-        Args:
-            sample_rate: Sample rate as integer
-
-        Returns:
-            krisp_audio.SamplingRate enum value
-
-        Raises:
-            ValueError: If sample rate is not supported
-        """
-        if sample_rate not in self.SAMPLE_RATES:
-            raise ValueError("Unsupported sample rate")
-        return self.SAMPLE_RATES[sample_rate]
-
-    async def start(self, sample_rate: int):
-        """Initialize the Krisp processor with the transport's sample rate.
-
-        Args:
-            sample_rate: The sample rate of the input transport in Hz.
-        """
-        model_info = krisp_audio.ModelInfo()
-        model_info.path = self._model_path
-
-        nc_cfg = krisp_audio.NcSessionConfig()
-        nc_cfg.inputSampleRate = self._int_to_sample_rate(sample_rate)
-        nc_cfg.inputFrameDuration = krisp_audio.FrameDuration.Fd10ms
-        nc_cfg.outputSampleRate = nc_cfg.inputSampleRate
-        nc_cfg.modelInfo = model_info
-
-        self._samples_per_frame = int((sample_rate * self.FRAME_SIZE_MS) / 1000)
-        self._session = krisp_audio.NcInt16.create(nc_cfg)
-
-    async def stop(self):
-        """Clean up the Krisp processor when stopping."""
-        self._session = None
-
-    async def process_frame(self, frame: FilterControlFrame):
-        """Process control frames to enable/disable filtering.
-
-        Args:
-            frame: The control frame containing filter commands.
-        """
-        if isinstance(frame, FilterEnableFrame):
-            self._filtering = frame.enable
-
-    async def filter(self, audio: bytes) -> bytes:
-        """Apply Krisp noise reduction to audio data.
-
-        Args:
-            audio: Raw audio data as bytes to be filtered.
-
-        Returns:
-            Noise-reduced audio data as bytes.
-        """
-        if not self._filtering:
-            return audio
-
-        # Add incoming audio to our buffer
-        self._audio_buffer.extend(audio)
-
-        # Calculate how many complete frames we can process
-        total_samples = len(self._audio_buffer) // 2  # 2 bytes per int16 sample
-        num_complete_frames = total_samples // self._samples_per_frame
-
-        if num_complete_frames == 0:
-            # Not enough samples for a complete frame yet, return empty
-            return b""
-
-        # Calculate how many bytes we need for complete frames
-        complete_samples_count = num_complete_frames * self._samples_per_frame
-        bytes_to_process = complete_samples_count * 2  # 2 bytes per sample
-
-        # Extract the bytes we can process
-        audio_to_process = bytes(self._audio_buffer[:bytes_to_process])
-
-        # Remove processed bytes from buffer, keep the remainder
-        self._audio_buffer = self._audio_buffer[bytes_to_process:]
-
-        # Process the complete frames
-        samples = np.frombuffer(audio_to_process, dtype=np.int16)
-        frames = samples.reshape(-1, self._samples_per_frame)
-        processed_samples = np.empty_like(samples)
-
-        for i, frame in enumerate(frames):
-            cleaned_frame = self._session.process(frame, self._noise_suppression_level)
-            processed_samples[i * self._samples_per_frame : (i + 1) * self._samples_per_frame] = (
-                cleaned_frame
-            )
-
-        return processed_samples.tobytes()
--- a/src/pipecat/processors/frame_processor.py
+++ b/src/pipecat/processors/frame_processor.py
@@ -877,8 +877,6 @@ class FrameProcessor(BaseObject):

        """
        while True:
-            (frame, direction, callback) = await self.__input_queue.get()
-
            if self.__should_block_system_frames and self.__input_event:
                logger.trace(f"{self}: system frame processing paused")
                await self.__input_event.wait()
@@ -886,6 +884,8 @@ class FrameProcessor(BaseObject):
                self.__should_block_system_frames = False
                logger.trace(f"{self}: system frame processing resumed")

+            (frame, direction, callback) = await self.__input_queue.get()
+
            if isinstance(frame, SystemFrame):
                await self.__process_frame(frame, direction, callback)
            elif self.__process_queue:
@@ -900,8 +900,6 @@ class FrameProcessor(BaseObject):
    async def __process_frame_task_handler(self):
        """Handle non-system frames from the process queue."""
        while True:
-            (frame, direction, callback) = await self.__process_queue.get()
-
            if self.__should_block_frames and self.__process_event:
                logger.trace(f"{self}: frame processing paused")
                await self.__process_event.wait()
@@ -909,6 +907,8 @@ class FrameProcessor(BaseObject):
                self.__should_block_frames = False
                logger.trace(f"{self}: frame processing resumed")

+            (frame, direction, callback) = await self.__process_queue.get()
+
            await self.__process_frame(frame, direction, callback)

            self.__process_queue.task_done()
--- a/src/pipecat/runner/run.py
+++ b/src/pipecat/runner/run.py
@@ -67,15 +67,12 @@ To run locally:

 import argparse
 import asyncio
-import mimetypes
 import os
 import sys
 from contextlib import asynccontextmanager
-from pathlib import Path
 from typing import Optional

 import aiohttp
-from fastapi.responses import FileResponse
 from loguru import logger

 from pipecat.runner.types import (
@@ -101,12 +98,6 @@ except ImportError as e:
 load_dotenv(override=True)
 os.environ["ENV"] = "local"

-TELEPHONY_TRANSPORTS = ["twilio", "telnyx", "plivo", "exotel"]
-
-RUNNER_DOWNLOADS_FOLDER: Optional[str] = None
-RUNNER_HOST: str = "localhost"
-RUNNER_PORT: int = 7860
-

 def _get_bot_module():
    """Get the bot module from the calling script."""
@@ -161,12 +152,7 @@ async def _run_telephony_bot(websocket: WebSocket):


 def _create_server_app(
-    *,
-    transport_type: str,
-    host: str = "localhost",
-    proxy: str,
-    esp32_mode: bool = False,
-    folder: Optional[str] = None,
+    transport_type: str, host: str = "localhost", proxy: str = None, esp32_mode: bool = False
 ):
    """Create FastAPI app with transport-specific routes."""
    app = FastAPI()
@@ -181,21 +167,19 @@ def _create_server_app(

    # Set up transport-specific routes
    if transport_type == "webrtc":
-        _setup_webrtc_routes(app, esp32_mode=esp32_mode, host=host, folder=folder)
+        _setup_webrtc_routes(app, esp32_mode=esp32_mode, host=host)
        _setup_whatsapp_routes(app)
    elif transport_type == "daily":
        _setup_daily_routes(app)
-    elif transport_type in TELEPHONY_TRANSPORTS:
-        _setup_telephony_routes(app, transport_type=transport_type, proxy=proxy)
+    elif transport_type in ["twilio", "telnyx", "plivo", "exotel"]:
+        _setup_telephony_routes(app, transport_type, proxy)
    else:
        logger.warning(f"Unknown transport type: {transport_type}")

    return app


-def _setup_webrtc_routes(
-    app: FastAPI, *, esp32_mode: bool = False, host: str = "localhost", folder: Optional[str] = None
-):
+def _setup_webrtc_routes(app: FastAPI, esp32_mode: bool = False, host: str = "localhost"):
    """Set up WebRTC-specific routes."""
    try:
        from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
@@ -217,21 +201,6 @@ def _setup_webrtc_routes(
        """Redirect root requests to client interface."""
        return RedirectResponse(url="/client/")

-    @app.get("/files/{filename:path}")
-    async def download_file(filename: str):
-        """Handle file downloads."""
-        if not folder:
-            logger.warning(f"Attempting to dowload {filename}, but downloads folder not setup.")
-            return
-
-        file_path = Path(folder) / filename
-        if not os.path.exists(file_path):
-            raise HTTPException(404)
-
-        media_type, _ = mimetypes.guess_type(file_path)
-
-        return FileResponse(path=file_path, media_type=media_type, filename=filename)
-
    # Initialize the SmallWebRTC request handler
    small_webrtc_handler: SmallWebRTCRequestHandler = SmallWebRTCRequestHandler(
        esp32_mode=esp32_mode, host=host
@@ -520,7 +489,7 @@ def _setup_daily_routes(app: FastAPI):
        return await _handle_rtvi_request(request)


-def _setup_telephony_routes(app: FastAPI, *, transport_type: str, proxy: str):
+def _setup_telephony_routes(app: FastAPI, transport_type: str, proxy: str):
    """Set up telephony-specific routes."""
    # XML response templates (Exotel doesn't use XML webhooks)
    XML_TEMPLATES = {
@@ -623,21 +592,6 @@ def _validate_and_clean_proxy(proxy: str) -> str:
    return proxy


-def runner_downloads_folder() -> Optional[str]:
-    """Returns the folder where files are stored for later download."""
-    return RUNNER_DOWNLOADS_FOLDER
-
-
-def runner_host() -> str:
-    """Returns the host name of this runner."""
-    return RUNNER_HOST
-
-
-def runner_port() -> int:
-    """Returns the port of this runner."""
-    return RUNNER_PORT
-
-
 def main():
    """Start the Pipecat development runner.

@@ -658,16 +612,14 @@ def main():

    The bot file must contain a `bot(runner_args)` function as the entry point.
    """
-    global RUNNER_DOWNLOADS_FOLDER, RUNNER_HOST, RUNNER_PORT
-
    parser = argparse.ArgumentParser(description="Pipecat Development Runner")
-    parser.add_argument("--host", type=str, default=RUNNER_HOST, help="Host address")
-    parser.add_argument("--port", type=int, default=RUNNER_PORT, help="Port number")
+    parser.add_argument("--host", type=str, default="localhost", help="Host address")
+    parser.add_argument("--port", type=int, default=7860, help="Port number")
    parser.add_argument(
        "-t",
        "--transport",
        type=str,
-        choices=["daily", "webrtc", *TELEPHONY_TRANSPORTS],
+        choices=["daily", "webrtc", "twilio", "telnyx", "plivo", "exotel"],
        default="webrtc",
        help="Transport type",
    )
@@ -685,7 +637,6 @@ def main():
        default=False,
        help="Connect directly to Daily room (automatically sets transport to daily)",
    )
-    parser.add_argument("-f", "--folder", type=str, help="Path to downloads folder")
    parser.add_argument(
        "--verbose", "-v", action="count", default=0, help="Increase logging verbosity"
    )
@@ -708,10 +659,6 @@ def main():
        logger.error("For ESP32, you need to specify `--host IP` so we can do SDP munging.")
        return

-    if args.transport in TELEPHONY_TRANSPORTS and not args.proxy:
-        logger.error(f"For telephony transports, you need to specify `--proxy PROXY`.")
-        return
-
    # Log level
    logger.remove()
    logger.add(sys.stderr, level="TRACE" if args.verbose else "DEBUG")
@@ -742,18 +689,8 @@ def main():
        print(f"   → Open http://{args.host}:{args.port} in your browser to start a session")
        print()

-    RUNNER_DOWNLOADS_FOLDER = args.folder
-    RUNNER_HOST = args.host
-    RUNNER_PORT = args.port
-
    # Create the app with transport-specific setup
-    app = _create_server_app(
-        transport_type=args.transport,
-        host=args.host,
-        proxy=args.proxy,
-        esp32_mode=args.esp32,
-        folder=args.folder,
-    )
+    app = _create_server_app(args.transport, args.host, args.proxy, args.esp32)

    # Run the server
    uvicorn.run(app, host=args.host, port=args.port)
--- a/src/pipecat/serializers/livekit.py
+++ b/src/pipecat/serializers/livekit.py
@@ -25,31 +25,11 @@ except ModuleNotFoundError as e:
 class LivekitFrameSerializer(FrameSerializer):
    """Serializer for converting between Pipecat frames and LiveKit audio frames.

-    .. deprecated:: 0.0.90
-
-        This class is deprecated and will be removed in a future version.
-        Please use LiveKitTransport instead, which handles audio streaming
-        and frame conversion natively.
-
    This serializer handles the conversion of Pipecat's OutputAudioRawFrame objects
    to LiveKit AudioFrame objects for transmission, and the reverse conversion
    for received audio data.
    """

-    def __init__(self):
-        """Initialize the LiveKit frame serializer."""
-        super().__init__()
-        import warnings
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("always")
-            warnings.warn(
-                "LivekitFrameSerializer is deprecated and will be removed in a future version. "
-                "Please use LiveKitTransport instead, which handles audio streaming natively.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
    @property
    def type(self) -> FrameSerializerType:
        """Get the serializer type.
--- a/src/pipecat/services/ai_service.py
+++ b/src/pipecat/services/ai_service.py
@@ -97,7 +97,9 @@ class AIService(FrameProcessor):
        pass

    async def _update_settings(self, settings: Mapping[str, Any]):
-        from pipecat.services.openai.realtime.events import SessionProperties
+        from pipecat.services.openai_realtime_beta.events import (
+            SessionProperties,
+        )

        for key, value in settings.items():
            logger.debug("Update request for:", key, value)
@@ -109,7 +111,9 @@ class AIService(FrameProcessor):
                logger.debug("Attempting to update", key, value)

                try:
-                    from pipecat.services.openai.realtime.events import TurnDetection
+                    from pipecat.services.openai_realtime_beta.events import (
+                        TurnDetection,
+                    )

                    if isinstance(self._session_properties, SessionProperties):
                        current_properties = self._session_properties
--- a/src/pipecat/services/aws/init.py
+++ b/src/pipecat/services/aws/init.py
@@ -9,7 +9,6 @@ import sys
 from pipecat.services import DeprecatedModuleProxy

 from .llm import *
-from .nova_sonic import *
 from .stt import *
 from .tts import *

--- a/src/pipecat/services/aws/nova_sonic/init.py
+++ b/src/pipecat/services/aws/nova_sonic/init.py
--- a/src/pipecat/services/aws/nova_sonic/context.py
+++ b/src/pipecat/services/aws/nova_sonic/context.py
@@ -1,367 +0,0 @@
-#
-# Copyright (c) 2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-"""Context management for AWS Nova Sonic LLM service.
-
-This module provides specialized context aggregators and message handling for AWS Nova Sonic,
-including conversation history management and role-specific message processing.
-"""
-
-import copy
-from dataclasses import dataclass, field
-from enum import Enum
-
-from loguru import logger
-
-from pipecat.frames.frames import (
-    BotStoppedSpeakingFrame,
-    DataFrame,
-    Frame,
-    FunctionCallResultFrame,
-    InterruptionFrame,
-    LLMFullResponseEndFrame,
-    LLMFullResponseStartFrame,
-    LLMMessagesAppendFrame,
-    LLMMessagesUpdateFrame,
-    LLMSetToolChoiceFrame,
-    LLMSetToolsFrame,
-    TextFrame,
-    UserImageRawFrame,
-)
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
-from pipecat.processors.frame_processor import FrameDirection
-from pipecat.services.aws.nova_sonic.frames import AWSNovaSonicFunctionCallResultFrame
-from pipecat.services.openai.llm import (
-    OpenAIAssistantContextAggregator,
-    OpenAIUserContextAggregator,
-)
-
-
-class Role(Enum):
-    """Roles supported in AWS Nova Sonic conversations.
-
-    Parameters:
-        SYSTEM: System-level messages (not used in conversation history).
-        USER: Messages sent by the user.
-        ASSISTANT: Messages sent by the assistant.
-        TOOL: Messages sent by tools (not used in conversation history).
-    """
-
-    SYSTEM = "SYSTEM"
-    USER = "USER"
-    ASSISTANT = "ASSISTANT"
-    TOOL = "TOOL"
-
-
-@dataclass
-class AWSNovaSonicConversationHistoryMessage:
-    """A single message in AWS Nova Sonic conversation history.
-
-    Parameters:
-        role: The role of the message sender (USER or ASSISTANT only).
-        text: The text content of the message.
-    """
-
-    role: Role  # only USER and ASSISTANT
-    text: str
-
-
-@dataclass
-class AWSNovaSonicConversationHistory:
-    """Complete conversation history for AWS Nova Sonic initialization.
-
-    Parameters:
-        system_instruction: System-level instruction for the conversation.
-        messages: List of conversation messages between user and assistant.
-    """
-
-    system_instruction: str = None
-    messages: list[AWSNovaSonicConversationHistoryMessage] = field(default_factory=list)
-
-
-class AWSNovaSonicLLMContext(OpenAILLMContext):
-    """Specialized LLM context for AWS Nova Sonic service.
-
-    Extends OpenAI context with Nova Sonic-specific message handling,
-    conversation history management, and text buffering capabilities.
-    """
-
-    def __init__(self, messages=None, tools=None, **kwargs):
-        """Initialize AWS Nova Sonic LLM context.
-
-        Args:
-            messages: Initial messages for the context.
-            tools: Available tools for the context.
-            **kwargs: Additional arguments passed to parent class.
-        """
-        super().__init__(messages=messages, tools=tools, **kwargs)
-        self.__setup_local()
-
-    def __setup_local(self, system_instruction: str = ""):
-        self._assistant_text = ""
-        self._user_text = ""
-        self._system_instruction = system_instruction
-
-    @staticmethod
-    def upgrade_to_nova_sonic(
-        obj: OpenAILLMContext, system_instruction: str
-    ) -> "AWSNovaSonicLLMContext":
-        """Upgrade an OpenAI context to AWS Nova Sonic context.
-
-        Args:
-            obj: The OpenAI context to upgrade.
-            system_instruction: System instruction for the context.
-
-        Returns:
-            The upgraded AWS Nova Sonic context.
-        """
-        if isinstance(obj, OpenAILLMContext) and not isinstance(obj, AWSNovaSonicLLMContext):
-            obj.__class__ = AWSNovaSonicLLMContext
-            obj.__setup_local(system_instruction)
-        return obj
-
-    # NOTE: this method has the side-effect of updating _system_instruction from messages
-    def get_messages_for_initializing_history(self) -> AWSNovaSonicConversationHistory:
-        """Get conversation history for initializing AWS Nova Sonic session.
-
-        Processes stored messages and extracts system instruction and conversation
-        history in the format expected by AWS Nova Sonic.
-
-        Returns:
-            Formatted conversation history with system instruction and messages.
-        """
-        history = AWSNovaSonicConversationHistory(system_instruction=self._system_instruction)
-
-        # Bail if there are no messages
-        if not self.messages:
-            return history
-
-        messages = copy.deepcopy(self.messages)
-
-        # If we have a "system" message as our first message, let's pull that out into "instruction"
-        if messages[0].get("role") == "system":
-            system = messages.pop(0)
-            content = system.get("content")
-            if isinstance(content, str):
-                history.system_instruction = content
-            elif isinstance(content, list):
-                history.system_instruction = content[0].get("text")
-            if history.system_instruction:
-                self._system_instruction = history.system_instruction
-
-        # Process remaining messages to fill out conversation history.
-        # Nova Sonic supports "user" and "assistant" messages in history.
-        for message in messages:
-            history_message = self.from_standard_message(message)
-            if history_message:
-                history.messages.append(history_message)
-
-        return history
-
-    def get_messages_for_persistent_storage(self):
-        """Get messages formatted for persistent storage.
-
-        Returns:
-            List of messages including system instruction if present.
-        """
-        messages = super().get_messages_for_persistent_storage()
-        # If we have a system instruction and messages doesn't already contain it, add it
-        if self._system_instruction and not (messages and messages[0].get("role") == "system"):
-            messages.insert(0, {"role": "system", "content": self._system_instruction})
-        return messages
-
-    def from_standard_message(self, message) -> AWSNovaSonicConversationHistoryMessage:
-        """Convert standard message format to Nova Sonic format.
-
-        Args:
-            message: Standard message dictionary to convert.
-
-        Returns:
-            Nova Sonic conversation history message, or None if not convertible.
-        """
-        role = message.get("role")
-        if message.get("role") == "user" or message.get("role") == "assistant":
-            content = message.get("content")
-            if isinstance(message.get("content"), list):
-                content = ""
-                for c in message.get("content"):
-                    if c.get("type") == "text":
-                        content += " " + c.get("text")
-                    else:
-                        logger.error(
-                            f"Unhandled content type in context message: {c.get('type')} - {message}"
-                        )
-            # There won't be content if this is an assistant tool call entry.
-            # We're ignoring those since they can't be loaded into AWS Nova Sonic conversation
-            # history
-            if content:
-                return AWSNovaSonicConversationHistoryMessage(role=Role[role.upper()], text=content)
-        # NOTE: we're ignoring messages with role "tool" since they can't be loaded into AWS Nova
-        # Sonic conversation history
-
-    def buffer_user_text(self, text):
-        """Buffer user text for later flushing to context.
-
-        Args:
-            text: User text to buffer.
-        """
-        self._user_text += f" {text}" if self._user_text else text
-        # logger.debug(f"User text buffered: {self._user_text}")
-
-    def flush_aggregated_user_text(self) -> str:
-        """Flush buffered user text to context as a complete message.
-
-        Returns:
-            The flushed user text, or empty string if no text was buffered.
-        """
-        if not self._user_text:
-            return ""
-        user_text = self._user_text
-        message = {
-            "role": "user",
-            "content": [{"type": "text", "text": user_text}],
-        }
-        self._user_text = ""
-        self.add_message(message)
-        # logger.debug(f"Context updated (user): {self.get_messages_for_logging()}")
-        return user_text
-
-    def buffer_assistant_text(self, text):
-        """Buffer assistant text for later flushing to context.
-
-        Args:
-            text: Assistant text to buffer.
-        """
-        self._assistant_text += text
-        # logger.debug(f"Assistant text buffered: {self._assistant_text}")
-
-    def flush_aggregated_assistant_text(self):
-        """Flush buffered assistant text to context as a complete message."""
-        if not self._assistant_text:
-            return
-        message = {
-            "role": "assistant",
-            "content": [{"type": "text", "text": self._assistant_text}],
-        }
-        self._assistant_text = ""
-        self.add_message(message)
-        # logger.debug(f"Context updated (assistant): {self.get_messages_for_logging()}")
-
-
-@dataclass
-class AWSNovaSonicMessagesUpdateFrame(DataFrame):
-    """Frame containing updated AWS Nova Sonic context.
-
-    Parameters:
-        context: The updated AWS Nova Sonic LLM context.
-    """
-
-    context: AWSNovaSonicLLMContext
-
-
-class AWSNovaSonicUserContextAggregator(OpenAIUserContextAggregator):
-    """Context aggregator for user messages in AWS Nova Sonic conversations.
-
-    Extends the OpenAI user context aggregator to emit Nova Sonic-specific
-    context update frames.
-    """
-
-    async def process_frame(
-        self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM
-    ):
-        """Process frames and emit Nova Sonic-specific context updates.
-
-        Args:
-            frame: The frame to process.
-            direction: The direction the frame is traveling.
-        """
-        await super().process_frame(frame, direction)
-
-        # Parent does not push LLMMessagesUpdateFrame
-        if isinstance(frame, LLMMessagesUpdateFrame):
-            await self.push_frame(AWSNovaSonicMessagesUpdateFrame(context=self._context))
-
-
-class AWSNovaSonicAssistantContextAggregator(OpenAIAssistantContextAggregator):
-    """Context aggregator for assistant messages in AWS Nova Sonic conversations.
-
-    Provides specialized handling for assistant responses and function calls
-    in AWS Nova Sonic context, with custom frame processing logic.
-    """
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        """Process frames with Nova Sonic-specific logic.
-
-        Args:
-            frame: The frame to process.
-            direction: The direction the frame is traveling.
-        """
-        # HACK: For now, disable the context aggregator by making it just pass through all frames
-        # that the parent handles (except the function call stuff, which we still need).
-        # For an explanation of this hack, see
-        # AWSNovaSonicLLMService._report_assistant_response_text_added.
-        if isinstance(
-            frame,
-            (
-                InterruptionFrame,
-                LLMFullResponseStartFrame,
-                LLMFullResponseEndFrame,
-                TextFrame,
-                LLMMessagesAppendFrame,
-                LLMMessagesUpdateFrame,
-                LLMSetToolsFrame,
-                LLMSetToolChoiceFrame,
-                UserImageRawFrame,
-                BotStoppedSpeakingFrame,
-            ),
-        ):
-            await self.push_frame(frame, direction)
-        else:
-            await super().process_frame(frame, direction)
-
-    async def handle_function_call_result(self, frame: FunctionCallResultFrame):
-        """Handle function call results for AWS Nova Sonic.
-
-        Args:
-            frame: The function call result frame to handle.
-        """
-        await super().handle_function_call_result(frame)
-
-        # The standard function callback code path pushes the FunctionCallResultFrame from the LLM
-        # itself, so we didn't have a chance to add the result to the AWS Nova Sonic server-side
-        # context. Let's push a special frame to do that.
-        await self.push_frame(
-            AWSNovaSonicFunctionCallResultFrame(result_frame=frame), FrameDirection.UPSTREAM
-        )
-
-
-@dataclass
-class AWSNovaSonicContextAggregatorPair:
-    """Pair of user and assistant context aggregators for AWS Nova Sonic.
-
-    Parameters:
-        _user: The user context aggregator.
-        _assistant: The assistant context aggregator.
-    """
-
-    _user: AWSNovaSonicUserContextAggregator
-    _assistant: AWSNovaSonicAssistantContextAggregator
-
-    def user(self) -> AWSNovaSonicUserContextAggregator:
-        """Get the user context aggregator.
-
-        Returns:
-            The user context aggregator instance.
-        """
-        return self._user
-
-    def assistant(self) -> AWSNovaSonicAssistantContextAggregator:
-        """Get the assistant context aggregator.
-
-        Returns:
-            The assistant context aggregator instance.
-        """
-        return self._assistant
--- a/src/pipecat/services/aws/nova_sonic/frames.py
+++ b/src/pipecat/services/aws/nova_sonic/frames.py
@@ -1,25 +0,0 @@
-#
-# Copyright (c) 2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-"""Custom frames for AWS Nova Sonic LLM service."""
-
-from dataclasses import dataclass
-
-from pipecat.frames.frames import DataFrame, FunctionCallResultFrame
-
-
-@dataclass
-class AWSNovaSonicFunctionCallResultFrame(DataFrame):
-    """Frame containing function call result for AWS Nova Sonic processing.
-
-    This frame wraps a standard function call result frame to enable
-    AWS Nova Sonic-specific handling and context updates.
-
-    Parameters:
-        result_frame: The underlying function call result frame.
-    """
-
-    result_frame: FunctionCallResultFrame
--- a/src/pipecat/services/aws/nova_sonic/llm.py
+++ b/src/pipecat/services/aws/nova_sonic/llm.py
--- a/src/pipecat/services/aws_nova_sonic/init.py
+++ b/src/pipecat/services/aws_nova_sonic/init.py
@@ -1,19 +1 @@
-#
-# Copyright (c) 2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-import warnings
-
-from pipecat.services.aws.nova_sonic.llm import AWSNovaSonicLLMService, Params
-
-with warnings.catch_warnings():
-    warnings.simplefilter("always")
-    warnings.warn(
-        "Types in pipecat.services.aws_nova_sonic are deprecated. "
-        "Please use the equivalent types from "
-        "pipecat.services.aws.nova_sonic.llm instead.",
-        DeprecationWarning,
-        stacklevel=2,
-    )
+from .aws import AWSNovaSonicLLMService, Params
--- a/src/pipecat/services/aws_nova_sonic/aws.py
+++ b/src/pipecat/services/aws_nova_sonic/aws.py
--- a/src/pipecat/services/aws_nova_sonic/context.py
+++ b/src/pipecat/services/aws_nova_sonic/context.py
@@ -10,16 +10,358 @@ This module provides specialized context aggregators and message handling for AW
 including conversation history management and role-specific message processing.
 """

-import warnings
+import copy
+from dataclasses import dataclass, field
+from enum import Enum

-from pipecat.services.aws.nova_sonic.context import *
+from loguru import logger

-with warnings.catch_warnings():
-    warnings.simplefilter("always")
-    warnings.warn(
-        "Types in pipecat.services.aws_nova_sonic.context are deprecated. "
-        "Please use the equivalent types from "
-        "pipecat.services.aws.nova_sonic.context instead.",
-        DeprecationWarning,
-        stacklevel=2,
-    )
+from pipecat.frames.frames import (
+    BotStoppedSpeakingFrame,
+    DataFrame,
+    Frame,
+    FunctionCallResultFrame,
+    InterruptionFrame,
+    LLMFullResponseEndFrame,
+    LLMFullResponseStartFrame,
+    LLMMessagesAppendFrame,
+    LLMMessagesUpdateFrame,
+    LLMSetToolChoiceFrame,
+    LLMSetToolsFrame,
+    TextFrame,
+    UserImageRawFrame,
+)
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.aws_nova_sonic.frames import AWSNovaSonicFunctionCallResultFrame
+from pipecat.services.openai.llm import (
+    OpenAIAssistantContextAggregator,
+    OpenAIUserContextAggregator,
+)
+
+
+class Role(Enum):
+    """Roles supported in AWS Nova Sonic conversations.
+
+    Parameters:
+        SYSTEM: System-level messages (not used in conversation history).
+        USER: Messages sent by the user.
+        ASSISTANT: Messages sent by the assistant.
+        TOOL: Messages sent by tools (not used in conversation history).
+    """
+
+    SYSTEM = "SYSTEM"
+    USER = "USER"
+    ASSISTANT = "ASSISTANT"
+    TOOL = "TOOL"
+
+
+@dataclass
+class AWSNovaSonicConversationHistoryMessage:
+    """A single message in AWS Nova Sonic conversation history.
+
+    Parameters:
+        role: The role of the message sender (USER or ASSISTANT only).
+        text: The text content of the message.
+    """
+
+    role: Role  # only USER and ASSISTANT
+    text: str
+
+
+@dataclass
+class AWSNovaSonicConversationHistory:
+    """Complete conversation history for AWS Nova Sonic initialization.
+
+    Parameters:
+        system_instruction: System-level instruction for the conversation.
+        messages: List of conversation messages between user and assistant.
+    """
+
+    system_instruction: str = None
+    messages: list[AWSNovaSonicConversationHistoryMessage] = field(default_factory=list)
+
+
+class AWSNovaSonicLLMContext(OpenAILLMContext):
+    """Specialized LLM context for AWS Nova Sonic service.
+
+    Extends OpenAI context with Nova Sonic-specific message handling,
+    conversation history management, and text buffering capabilities.
+    """
+
+    def __init__(self, messages=None, tools=None, **kwargs):
+        """Initialize AWS Nova Sonic LLM context.
+
+        Args:
+            messages: Initial messages for the context.
+            tools: Available tools for the context.
+            **kwargs: Additional arguments passed to parent class.
+        """
+        super().__init__(messages=messages, tools=tools, **kwargs)
+        self.__setup_local()
+
+    def __setup_local(self, system_instruction: str = ""):
+        self._assistant_text = ""
+        self._user_text = ""
+        self._system_instruction = system_instruction
+
+    @staticmethod
+    def upgrade_to_nova_sonic(
+        obj: OpenAILLMContext, system_instruction: str
+    ) -> "AWSNovaSonicLLMContext":
+        """Upgrade an OpenAI context to AWS Nova Sonic context.
+
+        Args:
+            obj: The OpenAI context to upgrade.
+            system_instruction: System instruction for the context.
+
+        Returns:
+            The upgraded AWS Nova Sonic context.
+        """
+        if isinstance(obj, OpenAILLMContext) and not isinstance(obj, AWSNovaSonicLLMContext):
+            obj.__class__ = AWSNovaSonicLLMContext
+            obj.__setup_local(system_instruction)
+        return obj
+
+    # NOTE: this method has the side-effect of updating _system_instruction from messages
+    def get_messages_for_initializing_history(self) -> AWSNovaSonicConversationHistory:
+        """Get conversation history for initializing AWS Nova Sonic session.
+
+        Processes stored messages and extracts system instruction and conversation
+        history in the format expected by AWS Nova Sonic.
+
+        Returns:
+            Formatted conversation history with system instruction and messages.
+        """
+        history = AWSNovaSonicConversationHistory(system_instruction=self._system_instruction)
+
+        # Bail if there are no messages
+        if not self.messages:
+            return history
+
+        messages = copy.deepcopy(self.messages)
+
+        # If we have a "system" message as our first message, let's pull that out into "instruction"
+        if messages[0].get("role") == "system":
+            system = messages.pop(0)
+            content = system.get("content")
+            if isinstance(content, str):
+                history.system_instruction = content
+            elif isinstance(content, list):
+                history.system_instruction = content[0].get("text")
+            if history.system_instruction:
+                self._system_instruction = history.system_instruction
+
+        # Process remaining messages to fill out conversation history.
+        # Nova Sonic supports "user" and "assistant" messages in history.
+        for message in messages:
+            history_message = self.from_standard_message(message)
+            if history_message:
+                history.messages.append(history_message)
+
+        return history
+
+    def get_messages_for_persistent_storage(self):
+        """Get messages formatted for persistent storage.
+
+        Returns:
+            List of messages including system instruction if present.
+        """
+        messages = super().get_messages_for_persistent_storage()
+        # If we have a system instruction and messages doesn't already contain it, add it
+        if self._system_instruction and not (messages and messages[0].get("role") == "system"):
+            messages.insert(0, {"role": "system", "content": self._system_instruction})
+        return messages
+
+    def from_standard_message(self, message) -> AWSNovaSonicConversationHistoryMessage:
+        """Convert standard message format to Nova Sonic format.
+
+        Args:
+            message: Standard message dictionary to convert.
+
+        Returns:
+            Nova Sonic conversation history message, or None if not convertible.
+        """
+        role = message.get("role")
+        if message.get("role") == "user" or message.get("role") == "assistant":
+            content = message.get("content")
+            if isinstance(message.get("content"), list):
+                content = ""
+                for c in message.get("content"):
+                    if c.get("type") == "text":
+                        content += " " + c.get("text")
+                    else:
+                        logger.error(
+                            f"Unhandled content type in context message: {c.get('type')} - {message}"
+                        )
+            # There won't be content if this is an assistant tool call entry.
+            # We're ignoring those since they can't be loaded into AWS Nova Sonic conversation
+            # history
+            if content:
+                return AWSNovaSonicConversationHistoryMessage(role=Role[role.upper()], text=content)
+        # NOTE: we're ignoring messages with role "tool" since they can't be loaded into AWS Nova
+        # Sonic conversation history
+
+    def buffer_user_text(self, text):
+        """Buffer user text for later flushing to context.
+
+        Args:
+            text: User text to buffer.
+        """
+        self._user_text += f" {text}" if self._user_text else text
+        # logger.debug(f"User text buffered: {self._user_text}")
+
+    def flush_aggregated_user_text(self) -> str:
+        """Flush buffered user text to context as a complete message.
+
+        Returns:
+            The flushed user text, or empty string if no text was buffered.
+        """
+        if not self._user_text:
+            return ""
+        user_text = self._user_text
+        message = {
+            "role": "user",
+            "content": [{"type": "text", "text": user_text}],
+        }
+        self._user_text = ""
+        self.add_message(message)
+        # logger.debug(f"Context updated (user): {self.get_messages_for_logging()}")
+        return user_text
+
+    def buffer_assistant_text(self, text):
+        """Buffer assistant text for later flushing to context.
+
+        Args:
+            text: Assistant text to buffer.
+        """
+        self._assistant_text += text
+        # logger.debug(f"Assistant text buffered: {self._assistant_text}")
+
+    def flush_aggregated_assistant_text(self):
+        """Flush buffered assistant text to context as a complete message."""
+        if not self._assistant_text:
+            return
+        message = {
+            "role": "assistant",
+            "content": [{"type": "text", "text": self._assistant_text}],
+        }
+        self._assistant_text = ""
+        self.add_message(message)
+        # logger.debug(f"Context updated (assistant): {self.get_messages_for_logging()}")
+
+
+@dataclass
+class AWSNovaSonicMessagesUpdateFrame(DataFrame):
+    """Frame containing updated AWS Nova Sonic context.
+
+    Parameters:
+        context: The updated AWS Nova Sonic LLM context.
+    """
+
+    context: AWSNovaSonicLLMContext
+
+
+class AWSNovaSonicUserContextAggregator(OpenAIUserContextAggregator):
+    """Context aggregator for user messages in AWS Nova Sonic conversations.
+
+    Extends the OpenAI user context aggregator to emit Nova Sonic-specific
+    context update frames.
+    """
+
+    async def process_frame(
+        self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM
+    ):
+        """Process frames and emit Nova Sonic-specific context updates.
+
+        Args:
+            frame: The frame to process.
+            direction: The direction the frame is traveling.
+        """
+        await super().process_frame(frame, direction)
+
+        # Parent does not push LLMMessagesUpdateFrame
+        if isinstance(frame, LLMMessagesUpdateFrame):
+            await self.push_frame(AWSNovaSonicMessagesUpdateFrame(context=self._context))
+
+
+class AWSNovaSonicAssistantContextAggregator(OpenAIAssistantContextAggregator):
+    """Context aggregator for assistant messages in AWS Nova Sonic conversations.
+
+    Provides specialized handling for assistant responses and function calls
+    in AWS Nova Sonic context, with custom frame processing logic.
+    """
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames with Nova Sonic-specific logic.
+
+        Args:
+            frame: The frame to process.
+            direction: The direction the frame is traveling.
+        """
+        # HACK: For now, disable the context aggregator by making it just pass through all frames
+        # that the parent handles (except the function call stuff, which we still need).
+        # For an explanation of this hack, see
+        # AWSNovaSonicLLMService._report_assistant_response_text_added.
+        if isinstance(
+            frame,
+            (
+                InterruptionFrame,
+                LLMFullResponseStartFrame,
+                LLMFullResponseEndFrame,
+                TextFrame,
+                LLMMessagesAppendFrame,
+                LLMMessagesUpdateFrame,
+                LLMSetToolsFrame,
+                LLMSetToolChoiceFrame,
+                UserImageRawFrame,
+                BotStoppedSpeakingFrame,
+            ),
+        ):
+            await self.push_frame(frame, direction)
+        else:
+            await super().process_frame(frame, direction)
+
+    async def handle_function_call_result(self, frame: FunctionCallResultFrame):
+        """Handle function call results for AWS Nova Sonic.
+
+        Args:
+            frame: The function call result frame to handle.
+        """
+        await super().handle_function_call_result(frame)
+
+        # The standard function callback code path pushes the FunctionCallResultFrame from the LLM
+        # itself, so we didn't have a chance to add the result to the AWS Nova Sonic server-side
+        # context. Let's push a special frame to do that.
+        await self.push_frame(
+            AWSNovaSonicFunctionCallResultFrame(result_frame=frame), FrameDirection.UPSTREAM
+        )
+
+
+@dataclass
+class AWSNovaSonicContextAggregatorPair:
+    """Pair of user and assistant context aggregators for AWS Nova Sonic.
+
+    Parameters:
+        _user: The user context aggregator.
+        _assistant: The assistant context aggregator.
+    """
+
+    _user: AWSNovaSonicUserContextAggregator
+    _assistant: AWSNovaSonicAssistantContextAggregator
+
+    def user(self) -> AWSNovaSonicUserContextAggregator:
+        """Get the user context aggregator.
+
+        Returns:
+            The user context aggregator instance.
+        """
+        return self._user
+
+    def assistant(self) -> AWSNovaSonicAssistantContextAggregator:
+        """Get the assistant context aggregator.
+
+        Returns:
+            The assistant context aggregator instance.
+        """
+        return self._assistant
--- a/src/pipecat/services/aws_nova_sonic/frames.py
+++ b/src/pipecat/services/aws_nova_sonic/frames.py
@@ -6,16 +6,20 @@

 """Custom frames for AWS Nova Sonic LLM service."""

-import warnings
+from dataclasses import dataclass

-from pipecat.services.aws.nova_sonic.frames import *
+from pipecat.frames.frames import DataFrame, FunctionCallResultFrame

-with warnings.catch_warnings():
-    warnings.simplefilter("always")
-    warnings.warn(
-        "Types in pipecat.services.aws_nova_sonic.frames are deprecated. "
-        "Please use the equivalent types from "
-        "pipecat.services.aws.nova_sonic.frames instead.",
-        DeprecationWarning,
-        stacklevel=2,
-    )
+
+@dataclass
+class AWSNovaSonicFunctionCallResultFrame(DataFrame):
+    """Frame containing function call result for AWS Nova Sonic processing.
+
+    This frame wraps a standard function call result frame to enable
+    AWS Nova Sonic-specific handling and context updates.
+
+    Parameters:
+        result_frame: The underlying function call result frame.
+    """
+
+    result_frame: FunctionCallResultFrame
--- a/src/pipecat/services/aws_nova_sonic/ready.wav
+++ b/src/pipecat/services/aws_nova_sonic/ready.wav
--- a/src/pipecat/services/azure/realtime/init.py
+++ b/src/pipecat/services/azure/realtime/init.py
--- a/src/pipecat/services/azure/realtime/llm.py
+++ b/src/pipecat/services/azure/realtime/llm.py
@@ -1,65 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-"""Azure OpenAI Realtime LLM service implementation."""
-
-from loguru import logger
-
-from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
-
-try:
-    from websockets.asyncio.client import connect as websocket_connect
-except ModuleNotFoundError as e:
-    logger.error(f"Exception: {e}")
-    logger.error("In order to use Azure Realtime, you need to `pip install pipecat-ai[openai]`.")
-    raise Exception(f"Missing module: {e}")
-
-
-class AzureRealtimeLLMService(OpenAIRealtimeLLMService):
-    """Azure OpenAI Realtime LLM service with Azure-specific authentication.
-
-    Extends the OpenAI Realtime service to work with Azure OpenAI endpoints,
-    using Azure's authentication headers and endpoint format. Provides the same
-    real-time audio and text communication capabilities as the base OpenAI service.
-    """
-
-    def __init__(
-        self,
-        *,
-        api_key: str,
-        base_url: str,
-        **kwargs,
-    ):
-        """Initialize Azure Realtime LLM service.
-
-        Args:
-            api_key: The API key for the Azure OpenAI service.
-            base_url: The full Azure WebSocket endpoint URL including api-version and deployment.
-                Example: "wss://my-project.openai.azure.com/openai/realtime?api-version=2024-10-01-preview&deployment=my-realtime-deployment"
-            **kwargs: Additional arguments passed to parent OpenAIRealtimeLLMService.
-        """
-        super().__init__(base_url=base_url, api_key=api_key, **kwargs)
-        self.api_key = api_key
-        self.base_url = base_url
-
-    async def _connect(self):
-        try:
-            if self._websocket:
-                # Here we assume that if we have a websocket, we are connected. We
-                # handle disconnections in the send/recv code paths.
-                return
-
-            logger.info(f"Connecting to {self.base_url}, api key: {self.api_key}")
-            self._websocket = await websocket_connect(
-                uri=self.base_url,
-                additional_headers={
-                    "api-key": self.api_key,
-                },
-            )
-            self._receive_task = self.create_task(self._receive_task_handler())
-        except Exception as e:
-            logger.error(f"{self} initialization error: {e}")
-            self._websocket = None
--- a/src/pipecat/services/gemini_multimodal_live/events.py
+++ b/src/pipecat/services/gemini_multimodal_live/events.py
@@ -30,15 +30,12 @@ except ModuleNotFoundError as e:
 # These aliases are just here for backward compatibility, since we used to
 # define public-facing StartSensitivity and EndSensitivity enums in this
 # module.
-with warnings.catch_warnings():
-    warnings.simplefilter("always")
-    warnings.warn(
-        "Importing StartSensitivity and EndSensitivity from "
-        "pipecat.services.gemini_multimodal_live.events is deprecated. "
-        "Please import them directly from google.genai.types instead.",
-        DeprecationWarning,
-        stacklevel=2,
-    )
-
+warnings.warn(
+    "Importing StartSensitivity and EndSensitivity from "
+    "pipecat.services.gemini_multimodal_live.events is deprecated. "
+    "Please import them directly from google.genai.types instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
 StartSensitivity = _StartSensitivity
 EndSensitivity = _EndSensitivity
--- a/src/pipecat/services/gemini_multimodal_live/file_api.py
+++ b/src/pipecat/services/gemini_multimodal_live/file_api.py
@@ -9,31 +9,181 @@
 This module provides a client for Google's Gemini File API, enabling file
 uploads, metadata retrieval, listing, and deletion. Files uploaded through
 this API can be referenced in Gemini generative model calls.
-
-.. deprecated:: 0.0.90
-    Importing GeminiFileAPI from this module is deprecated.
-    Import it from pipecat.services.google.gemini_live.file_api instead.
 """

-import warnings
+import mimetypes
+from typing import Any, Dict, Optional

+import aiohttp
 from loguru import logger

-try:
-    from pipecat.services.google.gemini_live.file_api import GeminiFileAPI as _GeminiFileAPI
-except ModuleNotFoundError as e:
-    logger.error(f"Exception: {e}")
-    logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.")
-    raise Exception(f"Missing module: {e}")

-# These aliases are just here for backward compatibility, since we used to
-# define public-facing StartSensitivity and EndSensitivity enums in this
-# module.
-warnings.warn(
-    "Importing GeminiFileAPI from "
-    "pipecat.services.gemini_multimodal_live.file_api is deprecated. "
-    "Please import it from pipecat.services.google.gemini_live.file_api instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-GeminiFileAPI = _GeminiFileAPI
+class GeminiFileAPI:
+    """Client for the Gemini File API.
+
+    This class provides methods for uploading, fetching, listing, and deleting files
+    through Google's Gemini File API.
+
+    Files uploaded through this API remain available for 48 hours and can be referenced
+    in calls to the Gemini generative models. Maximum file size is 2GB, with total
+    project storage limited to 20GB.
+    """
+
+    def __init__(
+        self, api_key: str, base_url: str = "https://generativelanguage.googleapis.com/v1beta/files"
+    ):
+        """Initialize the Gemini File API client.
+
+        Args:
+            api_key: Google AI API key
+            base_url: Base URL for the Gemini File API (default is the v1beta endpoint)
+        """
+        self._api_key = api_key
+        self._base_url = base_url
+        # Upload URL uses the /upload/ path
+        self.upload_base_url = "https://generativelanguage.googleapis.com/upload/v1beta/files"
+
+    async def upload_file(
+        self, file_path: str, display_name: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Upload a file to the Gemini File API using the correct resumable upload protocol.
+
+        Args:
+            file_path: Path to the file to upload
+            display_name: Optional display name for the file
+
+        Returns:
+            File metadata including uri, name, and display_name
+        """
+        logger.info(f"Uploading file: {file_path}")
+
+        async with aiohttp.ClientSession() as session:
+            # Determine the file's MIME type
+            mime_type, _ = mimetypes.guess_type(file_path)
+            if not mime_type:
+                mime_type = "application/octet-stream"
+
+            # Read the file
+            with open(file_path, "rb") as f:
+                file_data = f.read()
+
+            # Create the metadata payload
+            metadata = {}
+            if display_name:
+                metadata = {"file": {"display_name": display_name}}
+
+            # Step 1: Initial resumable request to get upload URL
+            headers = {
+                "X-Goog-Upload-Protocol": "resumable",
+                "X-Goog-Upload-Command": "start",
+                "X-Goog-Upload-Header-Content-Length": str(len(file_data)),
+                "X-Goog-Upload-Header-Content-Type": mime_type,
+                "Content-Type": "application/json",
+            }
+
+            logger.debug(f"Step 1: Getting upload URL from {self.upload_base_url}")
+            async with session.post(
+                f"{self.upload_base_url}?key={self._api_key}", headers=headers, json=metadata
+            ) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Error initiating file upload: {error_text}")
+                    raise Exception(f"Failed to initiate upload: {response.status} - {error_text}")
+
+                # Get the upload URL from the response header
+                upload_url = response.headers.get("X-Goog-Upload-URL")
+                if not upload_url:
+                    logger.error(f"Response headers: {dict(response.headers)}")
+                    raise Exception("No upload URL in response headers")
+
+                logger.debug(f"Got upload URL: {upload_url}")
+
+            # Step 2: Upload the actual file data
+            upload_headers = {
+                "Content-Length": str(len(file_data)),
+                "X-Goog-Upload-Offset": "0",
+                "X-Goog-Upload-Command": "upload, finalize",
+            }
+
+            logger.debug(f"Step 2: Uploading file data to {upload_url}")
+            async with session.post(upload_url, headers=upload_headers, data=file_data) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Error uploading file data: {error_text}")
+                    raise Exception(f"Failed to upload file: {response.status} - {error_text}")
+
+                file_info = await response.json()
+                logger.info(f"File uploaded successfully: {file_info.get('file', {}).get('name')}")
+                return file_info
+
+    async def get_file(self, name: str) -> Dict[str, Any]:
+        """Get metadata for a file.
+
+        Args:
+            name: File name (or full path)
+
+        Returns:
+            File metadata
+        """
+        # Extract just the name part if a full path is provided
+        if "/" in name:
+            name = name.split("/")[-1]
+
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{self._base_url}/{name}?key={self._api_key}") as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Error getting file metadata: {error_text}")
+                    raise Exception(f"Failed to get file metadata: {response.status}")
+
+                file_info = await response.json()
+                return file_info
+
+    async def list_files(
+        self, page_size: int = 10, page_token: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """List uploaded files.
+
+        Args:
+            page_size: Number of files to return per page
+            page_token: Token for pagination
+
+        Returns:
+            List of files and next page token if available
+        """
+        params = {"key": self._api_key, "pageSize": page_size}
+
+        if page_token:
+            params["pageToken"] = page_token
+
+        async with aiohttp.ClientSession() as session:
+            async with session.get(self._base_url, params=params) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Error listing files: {error_text}")
+                    raise Exception(f"Failed to list files: {response.status}")
+
+                result = await response.json()
+                return result
+
+    async def delete_file(self, name: str) -> bool:
+        """Delete a file.
+
+        Args:
+            name: File name (or full path)
+
+        Returns:
+            True if deleted successfully
+        """
+        # Extract just the name part if a full path is provided
+        if "/" in name:
+            name = name.split("/")[-1]
+
+        async with aiohttp.ClientSession() as session:
+            async with session.delete(f"{self._base_url}/{name}?key={self._api_key}") as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Error deleting file: {error_text}")
+                    raise Exception(f"Failed to delete file: {response.status}")
+
+                return True
--- a/src/pipecat/services/gemini_multimodal_live/gemini.py
+++ b/src/pipecat/services/gemini_multimodal_live/gemini.py
--- a/src/pipecat/services/google/init.py
+++ b/src/pipecat/services/google/init.py
@@ -9,7 +9,6 @@ import sys
 from pipecat.services import DeprecatedModuleProxy

 from .frames import *
-from .gemini_live import *
 from .image import *
 from .llm import *
 from .llm_openai import *
--- a/src/pipecat/services/google/gemini_live/init.py
+++ b/src/pipecat/services/google/gemini_live/init.py
@@ -1,3 +0,0 @@
-from .file_api import GeminiFileAPI
-from .llm import GeminiLiveLLMService
-from .llm_vertex import GeminiLiveVertexLLMService
--- a/src/pipecat/services/google/gemini_live/file_api.py
+++ b/src/pipecat/services/google/gemini_live/file_api.py
@@ -1,189 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-"""Gemini File API client for uploading and managing files.
-
-This module provides a client for Google's Gemini File API, enabling file
-uploads, metadata retrieval, listing, and deletion. Files uploaded through
-this API can be referenced in Gemini generative model calls.
-"""
-
-import mimetypes
-from typing import Any, Dict, Optional
-
-import aiohttp
-from loguru import logger
-
-
-class GeminiFileAPI:
-    """Client for the Gemini File API.
-
-    This class provides methods for uploading, fetching, listing, and deleting files
-    through Google's Gemini File API.
-
-    Files uploaded through this API remain available for 48 hours and can be referenced
-    in calls to the Gemini generative models. Maximum file size is 2GB, with total
-    project storage limited to 20GB.
-    """
-
-    def __init__(
-        self, api_key: str, base_url: str = "https://generativelanguage.googleapis.com/v1beta/files"
-    ):
-        """Initialize the Gemini File API client.
-
-        Args:
-            api_key: Google AI API key
-            base_url: Base URL for the Gemini File API (default is the v1beta endpoint)
-        """
-        self._api_key = api_key
-        self._base_url = base_url
-        # Upload URL uses the /upload/ path
-        self.upload_base_url = "https://generativelanguage.googleapis.com/upload/v1beta/files"
-
-    async def upload_file(
-        self, file_path: str, display_name: Optional[str] = None
-    ) -> Dict[str, Any]:
-        """Upload a file to the Gemini File API using the correct resumable upload protocol.
-
-        Args:
-            file_path: Path to the file to upload
-            display_name: Optional display name for the file
-
-        Returns:
-            File metadata including uri, name, and display_name
-        """
-        logger.info(f"Uploading file: {file_path}")
-
-        async with aiohttp.ClientSession() as session:
-            # Determine the file's MIME type
-            mime_type, _ = mimetypes.guess_type(file_path)
-            if not mime_type:
-                mime_type = "application/octet-stream"
-
-            # Read the file
-            with open(file_path, "rb") as f:
-                file_data = f.read()
-
-            # Create the metadata payload
-            metadata = {}
-            if display_name:
-                metadata = {"file": {"display_name": display_name}}
-
-            # Step 1: Initial resumable request to get upload URL
-            headers = {
-                "X-Goog-Upload-Protocol": "resumable",
-                "X-Goog-Upload-Command": "start",
-                "X-Goog-Upload-Header-Content-Length": str(len(file_data)),
-                "X-Goog-Upload-Header-Content-Type": mime_type,
-                "Content-Type": "application/json",
-            }
-
-            logger.debug(f"Step 1: Getting upload URL from {self.upload_base_url}")
-            async with session.post(
-                f"{self.upload_base_url}?key={self._api_key}", headers=headers, json=metadata
-            ) as response:
-                if response.status != 200:
-                    error_text = await response.text()
-                    logger.error(f"Error initiating file upload: {error_text}")
-                    raise Exception(f"Failed to initiate upload: {response.status} - {error_text}")
-
-                # Get the upload URL from the response header
-                upload_url = response.headers.get("X-Goog-Upload-URL")
-                if not upload_url:
-                    logger.error(f"Response headers: {dict(response.headers)}")
-                    raise Exception("No upload URL in response headers")
-
-                logger.debug(f"Got upload URL: {upload_url}")
-
-            # Step 2: Upload the actual file data
-            upload_headers = {
-                "Content-Length": str(len(file_data)),
-                "X-Goog-Upload-Offset": "0",
-                "X-Goog-Upload-Command": "upload, finalize",
-            }
-
-            logger.debug(f"Step 2: Uploading file data to {upload_url}")
-            async with session.post(upload_url, headers=upload_headers, data=file_data) as response:
-                if response.status != 200:
-                    error_text = await response.text()
-                    logger.error(f"Error uploading file data: {error_text}")
-                    raise Exception(f"Failed to upload file: {response.status} - {error_text}")
-
-                file_info = await response.json()
-                logger.info(f"File uploaded successfully: {file_info.get('file', {}).get('name')}")
-                return file_info
-
-    async def get_file(self, name: str) -> Dict[str, Any]:
-        """Get metadata for a file.
-
-        Args:
-            name: File name (or full path)
-
-        Returns:
-            File metadata
-        """
-        # Extract just the name part if a full path is provided
-        if "/" in name:
-            name = name.split("/")[-1]
-
-        async with aiohttp.ClientSession() as session:
-            async with session.get(f"{self._base_url}/{name}?key={self._api_key}") as response:
-                if response.status != 200:
-                    error_text = await response.text()
-                    logger.error(f"Error getting file metadata: {error_text}")
-                    raise Exception(f"Failed to get file metadata: {response.status}")
-
-                file_info = await response.json()
-                return file_info
-
-    async def list_files(
-        self, page_size: int = 10, page_token: Optional[str] = None
-    ) -> Dict[str, Any]:
-        """List uploaded files.
-
-        Args:
-            page_size: Number of files to return per page
-            page_token: Token for pagination
-
-        Returns:
-            List of files and next page token if available
-        """
-        params = {"key": self._api_key, "pageSize": page_size}
-
-        if page_token:
-            params["pageToken"] = page_token
-
-        async with aiohttp.ClientSession() as session:
-            async with session.get(self._base_url, params=params) as response:
-                if response.status != 200:
-                    error_text = await response.text()
-                    logger.error(f"Error listing files: {error_text}")
-                    raise Exception(f"Failed to list files: {response.status}")
-
-                result = await response.json()
-                return result
-
-    async def delete_file(self, name: str) -> bool:
-        """Delete a file.
-
-        Args:
-            name: File name (or full path)
-
-        Returns:
-            True if deleted successfully
-        """
-        # Extract just the name part if a full path is provided
-        if "/" in name:
-            name = name.split("/")[-1]
-
-        async with aiohttp.ClientSession() as session:
-            async with session.delete(f"{self._base_url}/{name}?key={self._api_key}") as response:
-                if response.status != 200:
-                    error_text = await response.text()
-                    logger.error(f"Error deleting file: {error_text}")
-                    raise Exception(f"Failed to delete file: {response.status}")
-
-                return True
--- a/src/pipecat/services/google/gemini_live/llm.py
+++ b/src/pipecat/services/google/gemini_live/llm.py
--- a/src/pipecat/services/google/gemini_live/llm_vertex.py
+++ b/src/pipecat/services/google/gemini_live/llm_vertex.py
@@ -1,184 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-"""Service for accessing Gemini Live via Google Vertex AI.
-
-This module provides integration with Google's Gemini Live model via
-Vertex AI, supporting both text and audio modalities with voice transcription,
-streaming responses, and tool usage.
-"""
-
-import json
-from typing import List, Optional, Union
-
-from loguru import logger
-
-from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.services.google.gemini_live.llm import (
-    GeminiLiveLLMService,
-    HttpOptions,
-    InputParams,
-)
-
-try:
-    from google.auth import default
-    from google.auth.exceptions import GoogleAuthError
-    from google.auth.transport.requests import Request
-    from google.genai import Client
-    from google.oauth2 import service_account
-
-except ModuleNotFoundError as e:
-    logger.error(f"Exception: {e}")
-    logger.error("In order to use Google Vertex AI, you need to `pip install pipecat-ai[google]`.")
-    raise Exception(f"Missing module: {e}")
-
-
-class GeminiLiveVertexLLMService(GeminiLiveLLMService):
-    """Provides access to Google's Gemini Live model via Vertex AI.
-
-    This service enables real-time conversations with Gemini, supporting both
-    text and audio modalities. It handles voice transcription, streaming audio
-    responses, and tool usage.
-    """
-
-    def __init__(
-        self,
-        *,
-        credentials: Optional[str] = None,
-        credentials_path: Optional[str] = None,
-        location: str,
-        project_id: str,
-        model="google/gemini-2.0-flash-live-preview-04-09",
-        voice_id: str = "Charon",
-        start_audio_paused: bool = False,
-        start_video_paused: bool = False,
-        system_instruction: Optional[str] = None,
-        tools: Optional[Union[List[dict], ToolsSchema]] = None,
-        params: Optional[InputParams] = None,
-        inference_on_context_initialization: bool = True,
-        file_api_base_url: str = "https://generativelanguage.googleapis.com/v1beta/files",
-        http_options: Optional[HttpOptions] = None,
-        **kwargs,
-    ):
-        """Initialize the service for accessing Gemini Live via Google Vertex AI.
-
-        Args:
-            credentials: JSON string of service account credentials.
-            credentials_path: Path to the service account JSON file.
-            location: GCP region for Vertex AI endpoint (e.g., "us-east4").
-            project_id: Google Cloud project ID.
-            model: Model identifier to use. Defaults to "models/gemini-2.0-flash-live-preview-04-09".
-            voice_id: TTS voice identifier. Defaults to "Charon".
-            start_audio_paused: Whether to start with audio input paused. Defaults to False.
-            start_video_paused: Whether to start with video input paused. Defaults to False.
-            system_instruction: System prompt for the model. Defaults to None.
-            tools: Tools/functions available to the model. Defaults to None.
-            params: Configuration parameters for the model along with Vertex AI
-                location and project ID.
-            inference_on_context_initialization: Whether to generate a response when context
-                is first set. Defaults to True.
-            file_api_base_url: Base URL for the Gemini File API. Defaults to the official endpoint.
-            http_options: HTTP options for the client.
-            **kwargs: Additional arguments passed to parent GeminiLiveLLMService.
-        """
-        # Check if user incorrectly passed api_key, which is used by parent
-        # class but not here.
-        if "api_key" in kwargs:
-            logger.error(
-                "GeminiLiveVertexLLMService does not accept 'api_key' parameter. "
-                "Use 'credentials' or 'credentials_path' instead for Vertex AI authentication."
-            )
-            raise ValueError(
-                "Invalid parameter 'api_key'. Use 'credentials' or 'credentials_path' for Vertex AI authentication."
-            )
-
-        # These need to be set before calling super().__init__() because
-        # super().__init__() invokes create_client(), which needs these.
-        self._credentials = self._get_credentials(credentials, credentials_path)
-        self._project_id = project_id
-        self._location = location
-
-        # Call parent constructor with the obtained API key
-        super().__init__(
-            # api_key is required by parent class, but actually not used with
-            # Vertex
-            api_key="dummy",
-            model=model,
-            voice_id=voice_id,
-            start_audio_paused=start_audio_paused,
-            start_video_paused=start_video_paused,
-            system_instruction=system_instruction,
-            tools=tools,
-            params=params,
-            inference_on_context_initialization=inference_on_context_initialization,
-            file_api_base_url=file_api_base_url,
-            http_options=http_options,
-            **kwargs,
-        )
-
-    def create_client(self):
-        """Create the Gemini client instance."""
-        self._client = Client(
-            vertexai=True,
-            credentials=self._credentials,
-            project=self._project_id,
-            location=self._location,
-        )
-
-    @property
-    def file_api(self):
-        """Gemini File API is not supported with Vertex AI."""
-        raise NotImplementedError(
-            "When using Vertex AI, the recommended approach is to use Google Cloud Storage for file handling. The Gemini File API is not directly supported in this context."
-        )
-
-    @staticmethod
-    def _get_credentials(credentials: Optional[str], credentials_path: Optional[str]) -> str:
-        """Retrieve Credentials using Google service account credentials JSON.
-
-        Supports multiple authentication methods:
-        1. Direct JSON credentials string
-        2. Path to service account JSON file
-        3. Default application credentials (ADC)
-
-        Args:
-            credentials: JSON string of service account credentials.
-            credentials_path: Path to the service account JSON file.
-
-        Returns:
-            OAuth token for API authentication.
-
-        Raises:
-            ValueError: If no valid credentials are provided or found.
-        """
-        creds: Optional[service_account.Credentials] = None
-
-        if credentials:
-            # Parse and load credentials from JSON string
-            creds = service_account.Credentials.from_service_account_info(
-                json.loads(credentials),
-                scopes=["https://www.googleapis.com/auth/cloud-platform"],
-            )
-        elif credentials_path:
-            # Load credentials from JSON file
-            creds = service_account.Credentials.from_service_account_file(
-                credentials_path,
-                scopes=["https://www.googleapis.com/auth/cloud-platform"],
-            )
-        else:
-            try:
-                creds, project_id = default(
-                    scopes=["https://www.googleapis.com/auth/cloud-platform"]
-                )
-            except GoogleAuthError:
-                pass
-
-        if not creds:
-            raise ValueError("No valid credentials provided.")
-
-        creds.refresh(Request())  # Ensure token is up-to-date, lifetime is 1 hour.
-
-        return creds
--- a/src/pipecat/services/google/llm_openai.py
+++ b/src/pipecat/services/google/llm_openai.py
@@ -94,9 +94,9 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
        async for chunk in chunk_stream:
            if chunk.usage:
                tokens = LLMTokenUsage(
-                    prompt_tokens=chunk.usage.prompt_tokens or 0,
-                    completion_tokens=chunk.usage.completion_tokens or 0,
-                    total_tokens=chunk.usage.total_tokens or 0,
+                    prompt_tokens=chunk.usage.prompt_tokens,
+                    completion_tokens=chunk.usage.completion_tokens,
+                    total_tokens=chunk.usage.total_tokens,
                )
                await self.start_llm_usage_metrics(tokens)

--- a/src/pipecat/services/google/llm_vertex.py
+++ b/src/pipecat/services/google/llm_vertex.py
@@ -53,44 +53,12 @@ class GoogleVertexLLMService(OpenAILLMService):

        Parameters:
            location: GCP region for Vertex AI endpoint (e.g., "us-east4").
-
-                .. deprecated:: 0.0.90
-                    Use `location` as a direct argument to
-                    `GoogleVertexLLMService.__init__()` instead.
-
            project_id: Google Cloud project ID.
-
-                .. deprecated:: 0.0.90
-                    Use `project_id` as a direct argument to
-                    `GoogleVertexLLMService.__init__()` instead.
        """

        # https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations
-        location: Optional[str] = None
-        project_id: Optional[str] = None
-
-        def __init__(self, **kwargs):
-            """Initializes the InputParams."""
-            import warnings
-
-            with warnings.catch_warnings():
-                warnings.simplefilter("always")
-                if "location" in kwargs and kwargs["location"] is not None:
-                    warnings.warn(
-                        "GoogleVertexLLMService.InputParams.location is deprecated. "
-                        "Please provide 'location' as a direct argument to GoogleVertexLLMService.__init__() instead.",
-                        DeprecationWarning,
-                        stacklevel=2,
-                    )
-
-                if "project_id" in kwargs and kwargs["project_id"] is not None:
-                    warnings.warn(
-                        "GoogleVertexLLMService.InputParams.project_id is deprecated. "
-                        "Please provide 'project_id' as a direct argument to GoogleVertexLLMService.__init__() instead.",
-                        DeprecationWarning,
-                        stacklevel=2,
-                    )
-            super().__init__(**kwargs)
+        location: str = "us-east4"
+        project_id: str

    def __init__(
        self,
@@ -98,8 +66,7 @@ class GoogleVertexLLMService(OpenAILLMService):
        credentials: Optional[str] = None,
        credentials_path: Optional[str] = None,
        model: str = "google/gemini-2.0-flash-001",
-        location: Optional[str] = None,
-        project_id: Optional[str] = None,
+        params: Optional[InputParams] = None,
        **kwargs,
    ):
        """Initializes the VertexLLMService.
@@ -108,60 +75,33 @@ class GoogleVertexLLMService(OpenAILLMService):
            credentials: JSON string of service account credentials.
            credentials_path: Path to the service account JSON file.
            model: Model identifier (e.g., "google/gemini-2.0-flash-001").
-            location: GCP region for Vertex AI endpoint (e.g., "us-east4").
-            project_id: Google Cloud project ID.
+            params: Vertex AI input parameters including location and project.
            **kwargs: Additional arguments passed to OpenAILLMService.
        """
-        # Handle deprecated InputParams fields
-        if "params" in kwargs and isinstance(kwargs["params"], GoogleVertexLLMService.InputParams):
-            params = kwargs["params"]
-            # Extract location and project_id from params if not provided
-            # directly, for backward compatibility
-            if project_id is None:
-                project_id = params.project_id
-            if location is None:
-                location = params.location
-            # Convert to base InputParams
-            params = OpenAILLMService.InputParams(
-                **params.model_dump(exclude={"location", "project_id"}, exclude_unset=True)
-            )
-            kwargs["params"] = params
-
-        # Validate project_id and location parameters
-        # NOTE: once we remove Vertex-spcific InputParams class, we can update
-        #       __init__() signature as follows:
-        #       - location: str = "us-east4",
-        #       - project_id: str,
-        #       But for now, we need them as-is to maintain proper backward
-        #       compatibility.
-        if project_id is None:
-            raise ValueError("project_id is required")
-        if location is None:
-            # If location is not provided, default to "us-east4".
-            # Note: this is legacy behavior; ideally location would be
-            # required.
-            logger.warning("location is not provided. Defaulting to 'us-east4'.")
-            location = "us-east4"  # Default location if not provided
-
-        base_url = self._get_base_url(location, project_id)
+        params = params or OpenAILLMService.InputParams()
+        base_url = self._get_base_url(params)
        self._api_key = self._get_api_token(credentials, credentials_path)

        super().__init__(
            api_key=self._api_key,
            base_url=base_url,
            model=model,
+            params=params,
            **kwargs,
        )

    @staticmethod
-    def _get_base_url(location: str, project_id: str) -> str:
+    def _get_base_url(params: InputParams) -> str:
        """Construct the base URL for Vertex AI API."""
        # Determine the correct API host based on location
-        if location == "global":
+        if params.location == "global":
            api_host = "aiplatform.googleapis.com"
        else:
-            api_host = f"{location}-aiplatform.googleapis.com"
-        return f"https://{api_host}/v1/projects/{project_id}/locations/{location}/endpoints/openapi"
+            api_host = f"{params.location}-aiplatform.googleapis.com"
+        return (
+            f"https://{api_host}/v1/"
+            f"projects/{params.project_id}/locations/{params.location}/endpoints/openapi"
+        )

    @staticmethod
    def _get_api_token(credentials: Optional[str], credentials_path: Optional[str]) -> str:
--- a/src/pipecat/services/hume/tts.py
+++ b/src/pipecat/services/hume/tts.py
@@ -42,7 +42,7 @@ class HumeTTSService(TTSService):
    """Hume Octave Text-to-Speech service.

    Streams PCM audio via Hume's HTTP output streaming (JSON chunks) endpoint
-    using the Python SDK and emits ``TTSAudioRawFrame`` frames suitable for Pipecat transports.
+    using the Python SDK and emits `TTSAudioRawFrame`s suitable for Pipecat transports.

    Supported features:

@@ -78,7 +78,7 @@ class HumeTTSService(TTSService):

        Args:
            api_key: Hume API key. If omitted, reads the ``HUME_API_KEY`` environment variable.
-            voice_id: ID of the voice to use. Only voice IDs are supported; voice names are not.
+            voice_id: ID of the voice to use (ID-only; names are not supported here).
            params: Optional synthesis controls (acting instructions, speed, trailing silence).
            sample_rate: Output sample rate for emitted PCM frames. Defaults to 48_000 (Hume).
            **kwargs: Additional arguments passed to the parent class.
--- a/src/pipecat/services/openai/init.py
+++ b/src/pipecat/services/openai/init.py
@@ -10,7 +10,6 @@ from pipecat.services import DeprecatedModuleProxy

 from .image import *
 from .llm import *
-from .realtime import *
 from .stt import *
 from .tts import *

--- a/src/pipecat/services/openai/realtime/init.py
+++ b/src/pipecat/services/openai/realtime/init.py
--- a/src/pipecat/services/openai/realtime/context.py
+++ b/src/pipecat/services/openai/realtime/context.py
@@ -1,272 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-"""OpenAI Realtime LLM context and aggregator implementations."""
-
-import copy
-import json
-
-from loguru import logger
-
-from pipecat.frames.frames import (
-    Frame,
-    FunctionCallResultFrame,
-    InterimTranscriptionFrame,
-    LLMMessagesUpdateFrame,
-    LLMSetToolsFrame,
-    LLMTextFrame,
-    TranscriptionFrame,
-)
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
-from pipecat.processors.frame_processor import FrameDirection
-from pipecat.services.openai.llm import (
-    OpenAIAssistantContextAggregator,
-    OpenAIUserContextAggregator,
-)
-
-from . import events
-from .frames import RealtimeFunctionCallResultFrame, RealtimeMessagesUpdateFrame
-
-
-class OpenAIRealtimeLLMContext(OpenAILLMContext):
-    """OpenAI Realtime LLM context with session management and message conversion.
-
-    Extends the standard OpenAI LLM context to support real-time session properties,
-    instruction management, and conversion between standard message formats and
-    realtime conversation items.
-    """
-
-    def __init__(self, messages=None, tools=None, **kwargs):
-        """Initialize the OpenAIRealtimeLLMContext.
-
-        Args:
-            messages: Initial conversation messages. Defaults to None.
-            tools: Available function tools. Defaults to None.
-            **kwargs: Additional arguments passed to parent OpenAILLMContext.
-        """
-        super().__init__(messages=messages, tools=tools, **kwargs)
-        self.__setup_local()
-
-    def __setup_local(self):
-        self.llm_needs_settings_update = True
-        self.llm_needs_initial_messages = True
-        self._session_instructions = ""
-
-        return
-
-    @staticmethod
-    def upgrade_to_realtime(obj: OpenAILLMContext) -> "OpenAIRealtimeLLMContext":
-        """Upgrade a standard OpenAI LLM context to a realtime context.
-
-        Args:
-            obj: The OpenAILLMContext instance to upgrade.
-
-        Returns:
-            The upgraded OpenAIRealtimeLLMContext instance.
-        """
-        if isinstance(obj, OpenAILLMContext) and not isinstance(obj, OpenAIRealtimeLLMContext):
-            obj.__class__ = OpenAIRealtimeLLMContext
-            obj.__setup_local()
-        return obj
-
-    # todo
-    #   - finish implementing all frames
-
-    def from_standard_message(self, message):
-        """Convert a standard message format to a realtime conversation item.
-
-        Args:
-            message: The standard message dictionary to convert.
-
-        Returns:
-            A ConversationItem instance for the realtime API.
-        """
-        if message.get("role") == "user":
-            content = message.get("content")
-            if isinstance(message.get("content"), list):
-                content = ""
-                for c in message.get("content"):
-                    if c.get("type") == "text":
-                        content += " " + c.get("text")
-                    else:
-                        logger.error(
-                            f"Unhandled content type in context message: {c.get('type')} - {message}"
-                        )
-            return events.ConversationItem(
-                role="user",
-                type="message",
-                content=[events.ItemContent(type="input_text", text=content)],
-            )
-        if message.get("role") == "assistant" and message.get("tool_calls"):
-            tc = message.get("tool_calls")[0]
-            return events.ConversationItem(
-                type="function_call",
-                call_id=tc["id"],
-                name=tc["function"]["name"],
-                arguments=tc["function"]["arguments"],
-            )
-        logger.error(f"Unhandled message type in from_standard_message: {message}")
-
-    def get_messages_for_initializing_history(self):
-        """Get conversation items for initializing the realtime session history.
-
-        Converts the context's messages to a format suitable for the realtime API,
-        handling system instructions and conversation history packaging.
-
-        Returns:
-            List of conversation items for session initialization.
-        """
-        # We can't load a long conversation history into the openai realtime api yet. (The API/model
-        # forgets that it can do audio, if you do a series of `conversation.item.create` calls.) So
-        # our general strategy until this is fixed is just to put everything into a first "user"
-        # message as a single input.
-        if not self.messages:
-            return []
-
-        messages = copy.deepcopy(self.messages)
-
-        # If we have a "system" message as our first message, let's pull that out into session
-        # "instructions"
-        if messages[0].get("role") == "system":
-            self.llm_needs_settings_update = True
-            system = messages.pop(0)
-            content = system.get("content")
-            if isinstance(content, str):
-                self._session_instructions = content
-            elif isinstance(content, list):
-                self._session_instructions = content[0].get("text")
-            if not messages:
-                return []
-
-        # If we have just a single "user" item, we can just send it normally
-        if len(messages) == 1 and messages[0].get("role") == "user":
-            return [self.from_standard_message(messages[0])]
-
-        # Otherwise, let's pack everything into a single "user" message with a bit of
-        # explanation for the LLM
-        intro_text = """
-        This is a previously saved conversation. Please treat this conversation history as a
-        starting point for the current conversation."""
-
-        trailing_text = """
-        This is the end of the previously saved conversation. Please continue the conversation
-        from here. If the last message is a user instruction or question, act on that instruction
-        or answer the question. If the last message is an assistant response, simple say that you
-        are ready to continue the conversation."""
-
-        return [
-            {
-                "role": "user",
-                "type": "message",
-                "content": [
-                    {
-                        "type": "input_text",
-                        "text": "\n\n".join(
-                            [intro_text, json.dumps(messages, indent=2), trailing_text]
-                        ),
-                    }
-                ],
-            }
-        ]
-
-    def add_user_content_item_as_message(self, item):
-        """Add a user content item as a standard message to the context.
-
-        Args:
-            item: The conversation item to add as a user message.
-        """
-        message = {
-            "role": "user",
-            "content": [{"type": "text", "text": item.content[0].transcript}],
-        }
-        self.add_message(message)
-
-
-class OpenAIRealtimeUserContextAggregator(OpenAIUserContextAggregator):
-    """User context aggregator for OpenAI Realtime API.
-
-    Handles user input frames and generates appropriate context updates
-    for the realtime conversation, including message updates and tool settings.
-
-    Args:
-        context: The OpenAI realtime LLM context.
-        **kwargs: Additional arguments passed to parent aggregator.
-    """
-
-    async def process_frame(
-        self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM
-    ):
-        """Process incoming frames and handle realtime-specific frame types.
-
-        Args:
-            frame: The frame to process.
-            direction: The direction of frame flow in the pipeline.
-        """
-        await super().process_frame(frame, direction)
-        # Parent does not push LLMMessagesUpdateFrame. This ensures that in a typical pipeline,
-        # messages are only processed by the user context aggregator, which is generally what we want. But
-        # we also need to send new messages over the websocket, so the openai realtime API has them
-        # in its context.
-        if isinstance(frame, LLMMessagesUpdateFrame):
-            await self.push_frame(RealtimeMessagesUpdateFrame(context=self._context))
-
-        # Parent also doesn't push the LLMSetToolsFrame.
-        if isinstance(frame, LLMSetToolsFrame):
-            await self.push_frame(frame, direction)
-
-    async def push_aggregation(self):
-        """Push user input aggregation.
-
-        Currently ignores all user input coming into the pipeline as realtime
-        audio input is handled directly by the service.
-        """
-        # for the moment, ignore all user input coming into the pipeline.
-        # todo: think about whether/how to fix this to allow for text input from
-        #       upstream (transport/transcription, or other sources)
-        pass
-
-
-class OpenAIRealtimeAssistantContextAggregator(OpenAIAssistantContextAggregator):
-    """Assistant context aggregator for OpenAI Realtime API.
-
-    Handles assistant output frames from the realtime service, filtering
-    out duplicate text frames and managing function call results.
-
-    Args:
-        context: The OpenAI realtime LLM context.
-        **kwargs: Additional arguments passed to parent aggregator.
-    """
-
-    # The LLMAssistantContextAggregator uses TextFrames to aggregate the LLM output,
-    # but the OpenAIRealtimeLLMService pushes LLMTextFrames and TTSTextFrames. We
-    # need to override this proces_frame for LLMTextFrame, so that only the TTSTextFrames
-    # are process. This ensures that the context gets only one set of messages.
-    # OpenAIRealtimeLLMService also pushes TranscriptionFrames and InterimTranscriptionFrames,
-    # so we need to ignore pushing those as well, as they're also TextFrames.
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        """Process assistant frames, filtering out duplicate text content.
-
-        Args:
-            frame: The frame to process.
-            direction: The direction of frame flow in the pipeline.
-        """
-        if not isinstance(frame, (LLMTextFrame, TranscriptionFrame, InterimTranscriptionFrame)):
-            await super().process_frame(frame, direction)
-
-    async def handle_function_call_result(self, frame: FunctionCallResultFrame):
-        """Handle function call result and notify the realtime service.
-
-        Args:
-            frame: The function call result frame to handle.
-        """
-        await super().handle_function_call_result(frame)
-
-        # The standard function callback code path pushes the FunctionCallResultFrame from the llm itself,
-        # so we didn't have a chance to add the result to the openai realtime api context. Let's push a
-        # special frame to do that.
-        await self.push_frame(
-            RealtimeFunctionCallResultFrame(result_frame=frame), FrameDirection.UPSTREAM
-        )
--- a/src/pipecat/services/openai/realtime/events.py
+++ b/src/pipecat/services/openai/realtime/events.py
--- a/src/pipecat/services/openai/realtime/frames.py
+++ b/src/pipecat/services/openai/realtime/frames.py
@@ -1,37 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-"""Custom frame types for OpenAI Realtime API integration."""
-
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
-
-from pipecat.frames.frames import DataFrame, FunctionCallResultFrame
-
-if TYPE_CHECKING:
-    from pipecat.services.openai.realtime.context import OpenAIRealtimeLLMContext
-
-
-@dataclass
-class RealtimeMessagesUpdateFrame(DataFrame):
-    """Frame indicating that the realtime context messages have been updated.
-
-    Parameters:
-        context: The updated OpenAI realtime LLM context.
-    """
-
-    context: "OpenAIRealtimeLLMContext"
-
-
-@dataclass
-class RealtimeFunctionCallResultFrame(DataFrame):
-    """Frame containing function call results for the realtime service.
-
-    Parameters:
-        result_frame: The function call result frame to send to the realtime API.
-    """
-
-    result_frame: FunctionCallResultFrame
--- a/src/pipecat/services/openai_realtime/init.py
+++ b/src/pipecat/services/openai_realtime/init.py
@@ -1,27 +1,9 @@
-#
-# Copyright (c) 2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-import warnings
-
-from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
-from pipecat.services.openai.realtime.events import (
+from .azure import AzureRealtimeLLMService
+from .events import (
    InputAudioNoiseReduction,
    InputAudioTranscription,
    SemanticTurnDetection,
    SessionProperties,
    TurnDetection,
 )
-from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
-
-with warnings.catch_warnings():
-    warnings.simplefilter("always")
-    warnings.warn(
-        "Types in pipecat.services.openai_realtime are deprecated. "
-        "Please use the equivalent types from "
-        "pipecat.services.openai.realtime instead.",
-        DeprecationWarning,
-        stacklevel=2,
-    )
+from .openai import OpenAIRealtimeLLMService
--- a/src/pipecat/services/openai_realtime/azure.py
+++ b/src/pipecat/services/openai_realtime/azure.py
@@ -1,21 +1,67 @@
 #
-# Copyright (c) 2025, Daily
+# Copyright (c) 2024–2025, Daily
 #
 # SPDX-License-Identifier: BSD 2-Clause License
 #

 """Azure OpenAI Realtime LLM service implementation."""

-import warnings
+from loguru import logger

-from pipecat.services.azure.realtime.llm import *
+from .openai import OpenAIRealtimeLLMService

-with warnings.catch_warnings():
-    warnings.simplefilter("always")
-    warnings.warn(
-        "Types in pipecat.services.openai_realtime.azure are deprecated. "
-        "Please use the equivalent types from "
-        "pipecat.services.azure.realtime.llm instead.",
-        DeprecationWarning,
-        stacklevel=2,
+try:
+    from websockets.asyncio.client import connect as websocket_connect
+except ModuleNotFoundError as e:
+    logger.error(f"Exception: {e}")
+    logger.error(
+        "In order to use OpenAI, you need to `pip install pipecat-ai[openai]`. Also, set `OPENAI_API_KEY` environment variable."
    )
+    raise Exception(f"Missing module: {e}")
+
+
+class AzureRealtimeLLMService(OpenAIRealtimeLLMService):
+    """Azure OpenAI Realtime LLM service with Azure-specific authentication.
+
+    Extends the OpenAI Realtime service to work with Azure OpenAI endpoints,
+    using Azure's authentication headers and endpoint format. Provides the same
+    real-time audio and text communication capabilities as the base OpenAI service.
+    """
+
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        base_url: str,
+        **kwargs,
+    ):
+        """Initialize Azure Realtime LLM service.
+
+        Args:
+            api_key: The API key for the Azure OpenAI service.
+            base_url: The full Azure WebSocket endpoint URL including api-version and deployment.
+                Example: "wss://my-project.openai.azure.com/openai/realtime?api-version=2024-10-01-preview&deployment=my-realtime-deployment"
+            **kwargs: Additional arguments passed to parent OpenAIRealtimeLLMService.
+        """
+        super().__init__(base_url=base_url, api_key=api_key, **kwargs)
+        self.api_key = api_key
+        self.base_url = base_url
+
+    async def _connect(self):
+        try:
+            if self._websocket:
+                # Here we assume that if we have a websocket, we are connected. We
+                # handle disconnections in the send/recv code paths.
+                return
+
+            logger.info(f"Connecting to {self.base_url}, api key: {self.api_key}")
+            self._websocket = await websocket_connect(
+                uri=self.base_url,
+                additional_headers={
+                    "api-key": self.api_key,
+                },
+            )
+            self._receive_task = self.create_task(self._receive_task_handler())
+        except Exception as e:
+            logger.error(f"{self} initialization error: {e}")
+            self._websocket = None
--- a/src/pipecat/services/openai_realtime/context.py
+++ b/src/pipecat/services/openai_realtime/context.py
@@ -1,21 +1,272 @@
 #
-# Copyright (c) 2025, Daily
+# Copyright (c) 2024–2025, Daily
 #
 # SPDX-License-Identifier: BSD 2-Clause License
 #

 """OpenAI Realtime LLM context and aggregator implementations."""

-import warnings
+import copy
+import json

-from pipecat.services.openai.realtime.context import *
+from loguru import logger

-with warnings.catch_warnings():
-    warnings.simplefilter("always")
-    warnings.warn(
-        "Types in pipecat.services.openai_realtime.context are deprecated. "
-        "Please use the equivalent types from "
-        "pipecat.services.openai.realtime.context instead.",
-        DeprecationWarning,
-        stacklevel=2,
-    )
+from pipecat.frames.frames import (
+    Frame,
+    FunctionCallResultFrame,
+    InterimTranscriptionFrame,
+    LLMMessagesUpdateFrame,
+    LLMSetToolsFrame,
+    LLMTextFrame,
+    TranscriptionFrame,
+)
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.openai.llm import (
+    OpenAIAssistantContextAggregator,
+    OpenAIUserContextAggregator,
+)
+
+from . import events
+from .frames import RealtimeFunctionCallResultFrame, RealtimeMessagesUpdateFrame
+
+
+class OpenAIRealtimeLLMContext(OpenAILLMContext):
+    """OpenAI Realtime LLM context with session management and message conversion.
+
+    Extends the standard OpenAI LLM context to support real-time session properties,
+    instruction management, and conversion between standard message formats and
+    realtime conversation items.
+    """
+
+    def __init__(self, messages=None, tools=None, **kwargs):
+        """Initialize the OpenAIRealtimeLLMContext.
+
+        Args:
+            messages: Initial conversation messages. Defaults to None.
+            tools: Available function tools. Defaults to None.
+            **kwargs: Additional arguments passed to parent OpenAILLMContext.
+        """
+        super().__init__(messages=messages, tools=tools, **kwargs)
+        self.__setup_local()
+
+    def __setup_local(self):
+        self.llm_needs_settings_update = True
+        self.llm_needs_initial_messages = True
+        self._session_instructions = ""
+
+        return
+
+    @staticmethod
+    def upgrade_to_realtime(obj: OpenAILLMContext) -> "OpenAIRealtimeLLMContext":
+        """Upgrade a standard OpenAI LLM context to a realtime context.
+
+        Args:
+            obj: The OpenAILLMContext instance to upgrade.
+
+        Returns:
+            The upgraded OpenAIRealtimeLLMContext instance.
+        """
+        if isinstance(obj, OpenAILLMContext) and not isinstance(obj, OpenAIRealtimeLLMContext):
+            obj.__class__ = OpenAIRealtimeLLMContext
+            obj.__setup_local()
+        return obj
+
+    # todo
+    #   - finish implementing all frames
+
+    def from_standard_message(self, message):
+        """Convert a standard message format to a realtime conversation item.
+
+        Args:
+            message: The standard message dictionary to convert.
+
+        Returns:
+            A ConversationItem instance for the realtime API.
+        """
+        if message.get("role") == "user":
+            content = message.get("content")
+            if isinstance(message.get("content"), list):
+                content = ""
+                for c in message.get("content"):
+                    if c.get("type") == "text":
+                        content += " " + c.get("text")
+                    else:
+                        logger.error(
+                            f"Unhandled content type in context message: {c.get('type')} - {message}"
+                        )
+            return events.ConversationItem(
+                role="user",
+                type="message",
+                content=[events.ItemContent(type="input_text", text=content)],
+            )
+        if message.get("role") == "assistant" and message.get("tool_calls"):
+            tc = message.get("tool_calls")[0]
+            return events.ConversationItem(
+                type="function_call",
+                call_id=tc["id"],
+                name=tc["function"]["name"],
+                arguments=tc["function"]["arguments"],
+            )
+        logger.error(f"Unhandled message type in from_standard_message: {message}")
+
+    def get_messages_for_initializing_history(self):
+        """Get conversation items for initializing the realtime session history.
+
+        Converts the context's messages to a format suitable for the realtime API,
+        handling system instructions and conversation history packaging.
+
+        Returns:
+            List of conversation items for session initialization.
+        """
+        # We can't load a long conversation history into the openai realtime api yet. (The API/model
+        # forgets that it can do audio, if you do a series of `conversation.item.create` calls.) So
+        # our general strategy until this is fixed is just to put everything into a first "user"
+        # message as a single input.
+        if not self.messages:
+            return []
+
+        messages = copy.deepcopy(self.messages)
+
+        # If we have a "system" message as our first message, let's pull that out into session
+        # "instructions"
+        if messages[0].get("role") == "system":
+            self.llm_needs_settings_update = True
+            system = messages.pop(0)
+            content = system.get("content")
+            if isinstance(content, str):
+                self._session_instructions = content
+            elif isinstance(content, list):
+                self._session_instructions = content[0].get("text")
+            if not messages:
+                return []
+
+        # If we have just a single "user" item, we can just send it normally
+        if len(messages) == 1 and messages[0].get("role") == "user":
+            return [self.from_standard_message(messages[0])]
+
+        # Otherwise, let's pack everything into a single "user" message with a bit of
+        # explanation for the LLM
+        intro_text = """
+        This is a previously saved conversation. Please treat this conversation history as a
+        starting point for the current conversation."""
+
+        trailing_text = """
+        This is the end of the previously saved conversation. Please continue the conversation
+        from here. If the last message is a user instruction or question, act on that instruction
+        or answer the question. If the last message is an assistant response, simple say that you
+        are ready to continue the conversation."""
+
+        return [
+            {
+                "role": "user",
+                "type": "message",
+                "content": [
+                    {
+                        "type": "input_text",
+                        "text": "\n\n".join(
+                            [intro_text, json.dumps(messages, indent=2), trailing_text]
+                        ),
+                    }
+                ],
+            }
+        ]
+
+    def add_user_content_item_as_message(self, item):
+        """Add a user content item as a standard message to the context.
+
+        Args:
+            item: The conversation item to add as a user message.
+        """
+        message = {
+            "role": "user",
+            "content": [{"type": "text", "text": item.content[0].transcript}],
+        }
+        self.add_message(message)
+
+
+class OpenAIRealtimeUserContextAggregator(OpenAIUserContextAggregator):
+    """User context aggregator for OpenAI Realtime API.
+
+    Handles user input frames and generates appropriate context updates
+    for the realtime conversation, including message updates and tool settings.
+
+    Args:
+        context: The OpenAI realtime LLM context.
+        **kwargs: Additional arguments passed to parent aggregator.
+    """
+
+    async def process_frame(
+        self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM
+    ):
+        """Process incoming frames and handle realtime-specific frame types.
+
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame flow in the pipeline.
+        """
+        await super().process_frame(frame, direction)
+        # Parent does not push LLMMessagesUpdateFrame. This ensures that in a typical pipeline,
+        # messages are only processed by the user context aggregator, which is generally what we want. But
+        # we also need to send new messages over the websocket, so the openai realtime API has them
+        # in its context.
+        if isinstance(frame, LLMMessagesUpdateFrame):
+            await self.push_frame(RealtimeMessagesUpdateFrame(context=self._context))
+
+        # Parent also doesn't push the LLMSetToolsFrame.
+        if isinstance(frame, LLMSetToolsFrame):
+            await self.push_frame(frame, direction)
+
+    async def push_aggregation(self):
+        """Push user input aggregation.
+
+        Currently ignores all user input coming into the pipeline as realtime
+        audio input is handled directly by the service.
+        """
+        # for the moment, ignore all user input coming into the pipeline.
+        # todo: think about whether/how to fix this to allow for text input from
+        #       upstream (transport/transcription, or other sources)
+        pass
+
+
+class OpenAIRealtimeAssistantContextAggregator(OpenAIAssistantContextAggregator):
+    """Assistant context aggregator for OpenAI Realtime API.
+
+    Handles assistant output frames from the realtime service, filtering
+    out duplicate text frames and managing function call results.
+
+    Args:
+        context: The OpenAI realtime LLM context.
+        **kwargs: Additional arguments passed to parent aggregator.
+    """
+
+    # The LLMAssistantContextAggregator uses TextFrames to aggregate the LLM output,
+    # but the OpenAIRealtimeLLMService pushes LLMTextFrames and TTSTextFrames. We
+    # need to override this proces_frame for LLMTextFrame, so that only the TTSTextFrames
+    # are process. This ensures that the context gets only one set of messages.
+    # OpenAIRealtimeLLMService also pushes TranscriptionFrames and InterimTranscriptionFrames,
+    # so we need to ignore pushing those as well, as they're also TextFrames.
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process assistant frames, filtering out duplicate text content.
+
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame flow in the pipeline.
+        """
+        if not isinstance(frame, (LLMTextFrame, TranscriptionFrame, InterimTranscriptionFrame)):
+            await super().process_frame(frame, direction)
+
+    async def handle_function_call_result(self, frame: FunctionCallResultFrame):
+        """Handle function call result and notify the realtime service.
+
+        Args:
+            frame: The function call result frame to handle.
+        """
+        await super().handle_function_call_result(frame)
+
+        # The standard function callback code path pushes the FunctionCallResultFrame from the llm itself,
+        # so we didn't have a chance to add the result to the openai realtime api context. Let's push a
+        # special frame to do that.
+        await self.push_frame(
+            RealtimeFunctionCallResultFrame(result_frame=frame), FrameDirection.UPSTREAM
+        )
--- a/src/pipecat/services/openai_realtime/events.py
+++ b/src/pipecat/services/openai_realtime/events.py
--- a/src/pipecat/services/openai_realtime/frames.py
+++ b/src/pipecat/services/openai_realtime/frames.py
@@ -1,21 +1,37 @@
 #
-# Copyright (c) 2025, Daily
+# Copyright (c) 2024–2025, Daily
 #
 # SPDX-License-Identifier: BSD 2-Clause License
 #

 """Custom frame types for OpenAI Realtime API integration."""

-import warnings
+from dataclasses import dataclass
+from typing import TYPE_CHECKING

-from pipecat.services.openai.realtime.frames import *
+from pipecat.frames.frames import DataFrame, FunctionCallResultFrame

-with warnings.catch_warnings():
-    warnings.simplefilter("always")
-    warnings.warn(
-        "Types in pipecat.services.openai_realtime.frames are deprecated. "
-        "Please use the equivalent types from "
-        "pipecat.services.openai.realtime.frames instead.",
-        DeprecationWarning,
-        stacklevel=2,
-    )
+if TYPE_CHECKING:
+    from pipecat.services.openai_realtime.context import OpenAIRealtimeLLMContext
+
+
+@dataclass
+class RealtimeMessagesUpdateFrame(DataFrame):
+    """Frame indicating that the realtime context messages have been updated.
+
+    Parameters:
+        context: The updated OpenAI realtime LLM context.
+    """
+
+    context: "OpenAIRealtimeLLMContext"
+
+
+@dataclass
+class RealtimeFunctionCallResultFrame(DataFrame):
+    """Frame containing function call results for the realtime service.
+
+    Parameters:
+        result_frame: The function call result frame to send to the realtime API.
+    """
+
+    result_frame: FunctionCallResultFrame
--- a/src/pipecat/services/openai_realtime/openai.py
+++ b/src/pipecat/services/openai_realtime/openai.py
--- a/src/pipecat/services/piper/tts.py
+++ b/src/pipecat/services/piper/tts.py
@@ -14,6 +14,7 @@ from loguru import logger
 from pipecat.frames.frames import (
    ErrorFrame,
    Frame,
+    TTSAudioRawFrame,
    TTSStartedFrame,
    TTSStoppedFrame,
 )
@@ -98,15 +99,16 @@ class PiperTTSService(TTSService):

                await self.start_tts_usage_metrics(text)

-                yield TTSStartedFrame()
-
                CHUNK_SIZE = self.chunk_size

-                async for frame in self._stream_audio_frames_from_iterator(
-                    response.content.iter_chunked(CHUNK_SIZE), strip_wav_header=True
-                ):
-                    await self.stop_ttfb_metrics()
-                    yield frame
+                yield TTSStartedFrame()
+                async for chunk in response.content.iter_chunked(CHUNK_SIZE):
+                    # remove wav header if present
+                    if chunk.startswith(b"RIFF"):
+                        chunk = chunk[44:]
+                    if len(chunk) > 0:
+                        await self.stop_ttfb_metrics()
+                        yield TTSAudioRawFrame(chunk, self.sample_rate, 1)
        except Exception as e:
            logger.error(f"Error in run_tts: {e}")
            yield ErrorFrame(error=str(e))
--- a/src/pipecat/services/rime/tts.py
+++ b/src/pipecat/services/rime/tts.py
@@ -553,13 +553,15 @@ class RimeHttpTTSService(TTSService):

                CHUNK_SIZE = self.chunk_size

-                async for frame in self._stream_audio_frames_from_iterator(
-                    response.content.iter_chunked(CHUNK_SIZE),
-                    strip_wav_header=need_to_strip_wav_header,
-                ):
-                    await self.stop_ttfb_metrics()
-                    yield frame
+                async for chunk in response.content.iter_chunked(CHUNK_SIZE):
+                    if need_to_strip_wav_header and chunk.startswith(b"RIFF"):
+                        chunk = chunk[44:]
+                        need_to_strip_wav_header = False

+                    if len(chunk) > 0:
+                        await self.stop_ttfb_metrics()
+                        frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
+                        yield frame
        except Exception as e:
            logger.exception(f"Error generating TTS: {e}")
            yield ErrorFrame(error=f"Rime TTS error: {str(e)}")
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -8,17 +8,7 @@

 import asyncio
 from abc import abstractmethod
-from typing import (
-    Any,
-    AsyncGenerator,
-    AsyncIterator,
-    Dict,
-    List,
-    Mapping,
-    Optional,
-    Sequence,
-    Tuple,
-)
+from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional, Sequence, Tuple

 from loguru import logger

@@ -384,36 +374,6 @@ class TTSService(AIService):
        ):
            await self._stop_frame_queue.put(frame)

-    async def _stream_audio_frames_from_iterator(
-        self, iterator: AsyncIterator[bytes], *, strip_wav_header: bool
-    ) -> AsyncGenerator[Frame, None]:
-        buffer = bytearray()
-        need_to_strip_wav_header = strip_wav_header
-        async for chunk in iterator:
-            if need_to_strip_wav_header and chunk.startswith(b"RIFF"):
-                chunk = chunk[44:]
-                need_to_strip_wav_header = False
-
-            # Append to current buffer.
-            buffer.extend(chunk)
-
-            # Round to nearest even number.
-            aligned_length = len(buffer) & ~1  # 111111111...11110
-            if aligned_length > 0:
-                aligned_chunk = buffer[:aligned_length]
-                buffer = buffer[aligned_length:]  # keep any leftover byte
-
-                if len(aligned_chunk) > 0:
-                    frame = TTSAudioRawFrame(bytes(aligned_chunk), self.sample_rate, 1)
-                    yield frame
-
-        if len(buffer) > 0:
-            # Make sure we don't need an extra padding byte.
-            if len(buffer) % 2 == 1:
-                buffer.extend(b"\x00")
-            frame = TTSAudioRawFrame(bytes(buffer), self.sample_rate, 1)
-            yield frame
-
    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
        self._processing_text = False
        await self._text_aggregator.handle_interruption()