Migrate realtime examples to RealtimeServiceModeConfig

Pass realtime_service_mode=RealtimeServiceModeConfig() through every realtime LLM service example (base, async-tool, video, text-output, persistent-context, update-settings, MCP) so context aggregation uses the new realtime-mode semantics instead of relying on local VAD as a workaround. Where examples previously wired SileroVADAnalyzer into LLMUserAggregatorParams to coax turn frames out of services that don't emit them server-side (AWS Nova Sonic, Ultravox, Gemini Live), the local VAD is now removed. realtime_service_mode keeps context writes correct without it, and the Phase 1.5 server-side InterruptionFrame fixes for Nova Sonic and Ultravox keep the bot from talking past the user when they barge in. Transcript-logging event handlers move from on_user_turn_stopped / on_assistant_turn_stopped to on_user_message_added / on_assistant_message_added, which carry the finalized text in realtime mode (the turn-stopped events fire before the message is finalized, so their `content` is None in that mode). For services that don't emit user-turn frames (Gemini Live, AWS Nova Sonic, Ultravox) the example now carries a Tier 1 comment block that spells out which downstream processors won't activate, how to add local VAD if needed, and the caveat that locally-generated turn boundaries are a heuristic that may diverge from server-side ground truth. Adds examples/realtime/realtime-openai-local-vad.py, a new variant of the OpenAI Realtime example that disables OpenAI's server-side turn detection and drives turn boundaries locally — useful when you want a turn analyzer like LocalSmartTurnV3 to decide when the user is done speaking. Server-emitted turn frames are still preferred when available. The Gemini Live local-VAD variant already existed; it's been updated in place rather than rewritten.
2026-05-20 15:51:18 -04:00
parent 20d9bf4af6
commit bff741a647
35 changed files with 537 additions and 158 deletions
--- a/examples/mcp/mcp-streamable-http-gemini-live.py
+++ b/examples/mcp/mcp-streamable-http-gemini-live.py
@@ -11,7 +11,6 @@ from dotenv import load_dotenv
 from loguru import logger
 from mcp.client.session_group import StreamableHttpParameters

-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -19,7 +18,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -84,7 +83,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        context = LLMContext([{"role": "user", "content": "Please introduce yourself."}])
        user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
            context,
-            user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+            realtime_service_mode=RealtimeServiceModeConfig(),
        )

        pipeline = Pipeline(
--- a/examples/persistent-context/persistent-context-aws-nova-sonic.py
+++ b/examples/persistent-context/persistent-context-aws-nova-sonic.py
@@ -15,7 +15,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -23,7 +22,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -241,7 +240,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext(tools=tools)
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/persistent-context/persistent-context-grok-realtime.py
+++ b/examples/persistent-context/persistent-context-grok-realtime.py
@@ -33,6 +33,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -203,7 +204,10 @@ Remember, your responses should be short - just one or two sentences usually."""
    llm.register_function("load_conversation", load_conversation)

    context = LLMContext([{"role": "developer", "content": "Say hello!"}], tools)
-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    pipeline = Pipeline(
        [
--- a/examples/persistent-context/persistent-context-openai-realtime.py
+++ b/examples/persistent-context/persistent-context-openai-realtime.py
@@ -15,7 +15,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -23,7 +22,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -217,7 +216,7 @@ Remember, your responses should be short. Just one or two sentences, usually."""
    context = LLMContext([{"role": "developer", "content": "Say hello!"}], tools)
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/realtime/realtime-aws-nova-sonic-async-tool.py
+++ b/examples/realtime/realtime-aws-nova-sonic-async-tool.py
@@ -24,7 +24,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -32,7 +31,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -133,7 +132,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext(tools=tools)
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/realtime/realtime-aws-nova-sonic.py
+++ b/examples/realtime/realtime-aws-nova-sonic.py
@@ -15,7 +15,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -24,7 +23,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
    UserTurnStoppedMessage,
 )
 from pipecat.runner.types import RunnerArguments
@@ -148,10 +147,25 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm.register_function("get_current_weather", fetch_weather_from_api)

    # Set up context and context management.
+    #
+    # AWS Nova Sonic drives the conversation server-side. It does NOT emit
+    # UserStartedSpeakingFrame / UserStoppedSpeakingFrame, so pipeline
+    # processors that depend on those frames — RTVI client speech events,
+    # TurnTrackingObserver, AudioBufferProcessor turn recording,
+    # UserIdleController, user mute strategies, voicemail detector — won't
+    # activate with the default server-VAD-only setup. Context aggregation
+    # still works with realtime_service_mode.
+    #
+    # To produce these frames locally, wire a VAD analyzer (e.g.
+    # SileroVADAnalyzer) into LLMUserAggregatorParams. Caveat: locally-
+    # generated turn boundaries are a heuristic and may not match Nova
+    # Sonic's server-side turn decisions, which is what drives the
+    # conversation; the two can drift apart in subtle ways especially
+    # around interruptions and overlapping speech.
    context = LLMContext(tools=tools)
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    # Build the pipeline
@@ -195,14 +209,18 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        logger.info(f"Client disconnected")
        await task.cancel()

-    @user_aggregator.event_handler("on_user_turn_stopped")
-    async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
+    # Nova Sonic doesn't emit user-turn frames so on_user_turn_stopped
+    # would never fire. The *_message_added events fire when messages are
+    # written to context and carry the finalized content; use those for
+    # transcript logging.
+    @user_aggregator.event_handler("on_user_message_added")
+    async def on_user_message_added(aggregator, message: UserTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}user: {message.content}"
        logger.info(f"Transcript: {line}")

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")
--- a/examples/realtime/realtime-azure-async-tool.py
+++ b/examples/realtime/realtime-azure-async-tool.py
@@ -24,7 +24,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -32,7 +31,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -144,7 +143,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext(tools=tools)
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/realtime/realtime-azure.py
+++ b/examples/realtime/realtime-azure.py
@@ -13,7 +13,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -21,7 +20,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -174,7 +173,7 @@ Remember, your responses should be short. Just one or two sentences, usually. Re

    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/realtime/realtime-gemini-live-async-tool.py
+++ b/examples/realtime/realtime-gemini-live-async-tool.py
@@ -28,7 +28,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
+)
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
@@ -125,7 +128,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    context = LLMContext()
    # Server-side VAD is enabled by default; no local VAD is added.
-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    pipeline = Pipeline(
        [
--- a/examples/realtime/realtime-gemini-live-files-api.py
+++ b/examples/realtime/realtime-gemini-live-files-api.py
@@ -15,7 +15,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
+)
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
@@ -158,7 +161,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        )

    # Server-side VAD is enabled by default; no local VAD is added.
-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    # Build the pipeline
    pipeline = Pipeline(
--- a/examples/realtime/realtime-gemini-live-google-search.py
+++ b/examples/realtime/realtime-gemini-live-google-search.py
@@ -15,7 +15,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
+)
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
@@ -84,7 +87,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        ],
    )
    # Server-side VAD is enabled by default; no local VAD is added.
-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    pipeline = Pipeline(
        [
--- a/examples/realtime/realtime-gemini-live-graceful-end.py
+++ b/examples/realtime/realtime-gemini-live-graceful-end.py
@@ -17,7 +17,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
+)
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -148,7 +151,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        [{"role": "developer", "content": "Say hello."}],
    )
    # Server-side VAD is enabled by default; no local VAD is added.
-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    pipeline = Pipeline(
        [
--- a/examples/realtime/realtime-gemini-live-grounding-metadata.py
+++ b/examples/realtime/realtime-gemini-live-grounding-metadata.py
@@ -9,7 +9,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
+)
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -115,7 +118,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    # Set up conversation context and management
    context = LLMContext()
    # Server-side VAD is enabled by default; no local VAD is added.
-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    pipeline = Pipeline(
        [
--- a/examples/realtime/realtime-gemini-live-local-vad.py
+++ b/examples/realtime/realtime-gemini-live-local-vad.py
@@ -4,6 +4,29 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #

+"""Gemini Live with locally-driven turn detection.
+
+By default Gemini Live drives the conversation with its own server-side VAD
+(see `realtime-gemini-live.py`). That setup doesn't surface
+``UserStartedSpeakingFrame`` / ``UserStoppedSpeakingFrame``, so pipeline
+processors that depend on those frames (RTVI client speech events,
+``TurnTrackingObserver``, ``AudioBufferProcessor`` turn recording,
+``UserIdleController``, user mute strategies, voicemail detector) don't
+activate.
+
+This variant disables Gemini Live's server-side VAD
+(``GeminiVADParams(disabled=True)``) and instead drives turn boundaries
+locally with ``SileroVADAnalyzer`` wired into the user aggregator. Use this
+variant if you need those downstream processors, or if you want a turn
+analyzer like ``LocalSmartTurnV3`` to decide when the user is done speaking.
+
+Caveat: locally-generated turn boundaries are a heuristic and may not match
+the provider's actual server-side turn decisions, which is what really
+drives the conversation. The two can drift apart in subtle, hard-to-debug
+ways, especially around interruptions and overlapping speech. Prefer
+server-emitted turn frames (i.e. the base `realtime-gemini-live.py` example)
+unless you have a specific reason to drive turn detection locally.
+"""

 import os

@@ -20,6 +43,7 @@ from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
    UserTurnStoppedMessage,
 )
 from pipecat.runner.types import RunnerArguments
@@ -72,6 +96,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    )
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
        user_params=LLMUserAggregatorParams(
            vad_analyzer=SileroVADAnalyzer(),
        ),
@@ -107,14 +132,17 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        logger.info(f"Client disconnected")
        await task.cancel()

-    @user_aggregator.event_handler("on_user_turn_stopped")
-    async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
+    # The *_message_added events fire when messages are written to context
+    # and carry the finalized content. In realtime mode the turn-stopped
+    # events fire before the message text is finalized.
+    @user_aggregator.event_handler("on_user_message_added")
+    async def on_user_message_added(aggregator, message: UserTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}user: {message.content}"
        logger.info(f"Transcript: {line}")

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")
--- a/examples/realtime/realtime-gemini-live-vertex.py
+++ b/examples/realtime/realtime-gemini-live-vertex.py
@@ -18,7 +18,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
+)
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.google.gemini_live.vertex.llm import GeminiLiveVertexLLMService
@@ -124,7 +127,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    context = LLMContext([{"role": "developer", "content": "Say hello."}])
    # Server-side VAD is enabled by default; no local VAD is added.
-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    pipeline = Pipeline(
        [
--- a/examples/realtime/realtime-gemini-live-video.py
+++ b/examples/realtime/realtime-gemini-live-video.py
@@ -16,7 +16,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
+)
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import (
    create_transport,
@@ -64,7 +67,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        ],
    )
    # Server-side VAD is enabled by default; no local VAD is added.
-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    pipeline = Pipeline(
        [
--- a/examples/realtime/realtime-gemini-live.py
+++ b/examples/realtime/realtime-gemini-live.py
@@ -21,6 +21,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
    UserTurnStoppedMessage,
 )
 from pipecat.runner.types import RunnerArguments
@@ -130,8 +131,23 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)

    context = LLMContext()
-    # Server-side VAD is enabled by default; no local VAD is added.
-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    # Gemini Live drives the conversation server-side. It does NOT emit
+    # UserStartedSpeakingFrame / UserStoppedSpeakingFrame, so pipeline
+    # processors that depend on those frames — RTVI client speech events,
+    # TurnTrackingObserver, AudioBufferProcessor turn recording,
+    # UserIdleController, user mute strategies, voicemail detector — won't
+    # activate with the default server-VAD-only setup. Context aggregation
+    # still works with realtime_service_mode.
+    #
+    # To produce these frames locally, see `realtime-gemini-live-local-vad.py`.
+    # Caveat: locally-generated turn boundaries are a heuristic and may not
+    # match Gemini Live's server-side turn decisions, which is what drives the
+    # conversation; the two can drift apart in subtle ways especially around
+    # interruptions and overlapping speech.
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    pipeline = Pipeline(
        [
@@ -166,14 +182,19 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        logger.info(f"Client disconnected")
        await task.cancel()

-    @user_aggregator.event_handler("on_user_turn_stopped")
-    async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
+    # Gemini Live doesn't emit user-turn frames so on_user_turn_stopped
+    # would never fire. The *_message_added events fire when messages are
+    # written to context and carry the finalized content; use those for
+    # transcript logging regardless of whether the service emits turn
+    # frames.
+    @user_aggregator.event_handler("on_user_message_added")
+    async def on_user_message_added(aggregator, message: UserTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}user: {message.content}"
        logger.info(f"Transcript: {line}")

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")
--- a/examples/realtime/realtime-grok-async-tool.py
+++ b/examples/realtime/realtime-grok-async-tool.py
@@ -29,7 +29,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
+)
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.llm_service import FunctionCallParams
@@ -129,7 +132,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    )

    context = LLMContext(tools=tools)
-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    pipeline = Pipeline(
        [
--- a/examples/realtime/realtime-grok.py
+++ b/examples/realtime/realtime-grok.py
@@ -33,9 +33,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-
-# Note: Grok has built-in server-side VAD, so we don't need local VAD
-# from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.observers.loggers.transcription_log_observer import (
    TranscriptionLogObserver,
@@ -47,6 +44,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
    UserTurnStoppedMessage,
 )
 from pipecat.runner.types import RunnerArguments
@@ -212,7 +210,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        tools,
    )

-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    # Build the pipeline
    # Note: In realtime mode, transcription comes from Grok (upstream),
@@ -248,15 +249,19 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        logger.info("Client disconnected")
        await task.cancel()

-    # Log transcript updates
-    @user_aggregator.event_handler("on_user_turn_stopped")
-    async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
+    # Log transcript updates. In realtime mode the turn-stopped events
+    # fire before the message text is finalized (UserTurnStoppedMessage
+    # content is None), so subscribe to the *_message_added events
+    # instead — they fire when the message is written to context and
+    # carry the finalized content.
+    @user_aggregator.event_handler("on_user_message_added")
+    async def on_user_message_added(aggregator, message: UserTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}user: {message.content}"
        logger.info(f"Transcript: {line}")

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")
--- a/examples/realtime/realtime-inworld.py
+++ b/examples/realtime/realtime-inworld.py
@@ -47,6 +47,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
+    RealtimeServiceModeConfig,
    UserTurnStoppedMessage,
 )
 from pipecat.runner.types import RunnerArguments
@@ -149,7 +150,10 @@ Always be helpful and proactive in offering assistance.""",
        tools,
    )

-    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        realtime_service_mode=RealtimeServiceModeConfig(),
+    )

    # Build the pipeline
    pipeline = Pipeline(
@@ -182,13 +186,16 @@ Always be helpful and proactive in offering assistance.""",
        logger.info("Client disconnected")
        await task.cancel()

-    @user_aggregator.event_handler("on_user_turn_stopped")
-    async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
+    # In realtime mode the turn-stopped events fire before the message
+    # text is finalized; subscribe to the *_message_added events for the
+    # finalized content.
+    @user_aggregator.event_handler("on_user_message_added")
+    async def on_user_message_added(aggregator, message: UserTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        logger.info(f"Transcript: {timestamp}user: {message.content}")

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        logger.info(f"Transcript: {timestamp}assistant: {message.content}")

--- a/examples/realtime/realtime-openai-async-tool.py
+++ b/examples/realtime/realtime-openai-async-tool.py
@@ -24,7 +24,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -32,7 +31,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -147,7 +146,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext(tools=tools)
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/realtime/realtime-openai-live-video.py
+++ b/examples/realtime/realtime-openai-live-video.py
@@ -10,7 +10,6 @@ import os
 from dotenv import load_dotenv
 from loguru import logger

-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
 from pipecat.pipeline.pipeline import Pipeline
@@ -19,7 +18,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import (
@@ -106,7 +105,7 @@ Remember, your responses should be short. Just one or two sentences, usually. Re

    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/realtime/realtime-openai-local-vad.py
+++ b/examples/realtime/realtime-openai-local-vad.py
@@ -0,0 +1,267 @@
+#
+# Copyright (c) 2024-2026, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+"""OpenAI Realtime with locally-driven turn detection.
+
+By default OpenAI Realtime drives the conversation with its own server-side
+VAD (see `realtime-openai.py`). This variant disables server-side turn
+detection (``turn_detection=False``) and instead drives turn boundaries
+locally with ``SileroVADAnalyzer`` wired into the user aggregator. This is
+the path to take if you want a turn analyzer like ``LocalSmartTurnV3`` to
+decide when the user is done speaking, or if you need ``UserStartedSpeakingFrame``
+/ ``UserStoppedSpeakingFrame`` to fire from the same source as
+``InterruptionFrame``.
+
+Caveat: locally-generated turn boundaries are a heuristic and may not match
+the provider's actual server-side turn decisions. With OpenAI Realtime,
+server-side turn detection is generally what the service expects to drive
+the conversation, and disabling it puts the responsibility on you. Prefer
+server-emitted turn frames (i.e. the base `realtime-openai.py` example)
+unless you have a specific reason to drive turn detection locally.
+"""
+
+import asyncio
+import os
+from datetime import datetime
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.frames.frames import LLMRunFrame, LLMSetToolsFrame
+from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import (
+    AssistantTurnStoppedMessage,
+    LLMContextAggregatorPair,
+    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
+    UserTurnStoppedMessage,
+)
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.llm_service import FunctionCallParams
+from pipecat.services.openai.realtime.events import (
+    AudioConfiguration,
+    AudioInput,
+    InputAudioNoiseReduction,
+    InputAudioTranscription,
+    SessionProperties,
+)
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+
+async def fetch_weather_from_api(params: FunctionCallParams):
+    temperature = 75 if params.arguments["format"] == "fahrenheit" else 24
+    await params.result_callback(
+        {
+            "conditions": "nice",
+            "temperature": temperature,
+            "format": params.arguments["format"],
+            "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
+        }
+    )
+
+
+async def get_news(params: FunctionCallParams):
+    await params.result_callback(
+        {
+            "news": [
+                "Massive UFO currently hovering above New York City",
+                "Stock markets reach all-time highs",
+                "Living dinosaur species discovered in the Amazon rainforest",
+            ],
+        }
+    )
+
+
+async def fetch_restaurant_recommendation(params: FunctionCallParams):
+    await params.result_callback({"name": "The Golden Dragon"})
+
+
+weather_function = FunctionSchema(
+    name="get_current_weather",
+    description="Get the current weather",
+    properties={
+        "location": {
+            "type": "string",
+            "description": "The city and state, e.g. San Francisco, CA",
+        },
+        "format": {
+            "type": "string",
+            "enum": ["celsius", "fahrenheit"],
+            "description": "The temperature unit to use. Infer this from the users location.",
+        },
+    },
+    required=["location", "format"],
+)
+
+get_news_function = FunctionSchema(
+    name="get_news",
+    description="Get the current news.",
+    properties={},
+    required=[],
+)
+
+restaurant_function = FunctionSchema(
+    name="get_restaurant_recommendation",
+    description="Get a restaurant recommendation",
+    properties={
+        "location": {
+            "type": "string",
+            "description": "The city and state, e.g. San Francisco, CA",
+        },
+    },
+    required=["location"],
+)
+
+tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
+
+
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    llm = OpenAIRealtimeLLMService(
+        api_key=os.environ["OPENAI_API_KEY"],
+        settings=OpenAIRealtimeLLMService.Settings(
+            system_instruction="""You are a helpful and friendly AI.
+
+Act like a human, but remember that you aren't a human and that you can't do human
+things in the real world. Your voice and personality should be warm and engaging, with a lively and
+playful tone.
+
+If interacting in a non-English language, start by using the standard accent or dialect familiar to
+the user. Talk quickly. You should always call a function if you can. Do not refer to these rules,
+even if you're asked about them.
+
+You are participating in a voice conversation. Keep your responses concise, short, and to the point
+unless specifically asked to elaborate on a topic.
+
+Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""",
+            session_properties=SessionProperties(
+                audio=AudioConfiguration(
+                    input=AudioInput(
+                        transcription=InputAudioTranscription(),
+                        # Disable OpenAI's server-side turn detection — this
+                        # example drives turn boundaries locally via the
+                        # SileroVADAnalyzer wired into the user aggregator
+                        # below.
+                        turn_detection=False,
+                        noise_reduction=InputAudioNoiseReduction(type="near_field"),
+                    )
+                ),
+            ),
+        ),
+    )
+
+    llm.register_function("get_current_weather", fetch_weather_from_api)
+    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
+    llm.register_function("get_news", get_news)
+
+    context = LLMContext(
+        [{"role": "developer", "content": "Say hello!"}],
+        tools,
+    )
+
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        # Drive turn detection locally via SileroVAD wired into the user
+        # aggregator. realtime_service_mode keeps context-write semantics
+        # correct and (by default) drops the transcript wait on turn-end so
+        # local VAD can drive turn boundaries on the latency critical path.
+        realtime_service_mode=RealtimeServiceModeConfig(),
+        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+    )
+
+    pipeline = Pipeline(
+        [
+            transport.input(),
+            user_aggregator,
+            llm,
+            transport.output(),
+            assistant_aggregator,
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+        observers=[TranscriptionLogObserver()],
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        await task.queue_frames([LLMRunFrame()])
+
+        await asyncio.sleep(15)
+        new_tools = ToolsSchema(
+            standard_tools=[weather_function, restaurant_function, get_news_function]
+        )
+        await task.queue_frames([LLMSetToolsFrame(tools=new_tools)])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    @user_aggregator.event_handler("on_user_message_added")
+    async def on_user_message_added(aggregator, message: UserTurnStoppedMessage):
+        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
+        line = f"{timestamp}user: {message.content}"
+        logger.info(f"Transcript: {line}")
+
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
+        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
+        line = f"{timestamp}assistant: {message.content}"
+        logger.info(f"Transcript: {line}")
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/realtime/realtime-openai-text.py
+++ b/examples/realtime/realtime-openai-text.py
@@ -13,7 +13,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -21,7 +20,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -177,7 +176,7 @@ Remember, your responses should be short. Just one or two sentences, usually. Re

    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/realtime/realtime-openai.py
+++ b/examples/realtime/realtime-openai.py
@@ -14,7 +14,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame, LLMSetToolsFrame
 from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
 from pipecat.pipeline.pipeline import Pipeline
@@ -24,7 +23,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
    UserTurnStoppedMessage,
 )
 from pipecat.runner.types import RunnerArguments
@@ -187,7 +186,13 @@ Remember, your responses should be short. Just one or two sentences, usually. Re

    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        # OpenAI Realtime drives the conversation server-side and emits its
+        # own UserStarted/StoppedSpeakingFrame from server VAD events, so
+        # local VAD on the aggregator is unnecessary. realtime_service_mode
+        # decouples context writes from turn frames and transcript-bound
+        # turn-end. See `realtime-openai-local-vad.py` for the variant
+        # that disables server VAD and drives turn detection locally.
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
@@ -251,15 +256,19 @@ Remember, your responses should be short. Just one or two sentences, usually. Re
        logger.info(f"Client disconnected")
        await task.cancel()

-    # Log transcript updates
-    @user_aggregator.event_handler("on_user_turn_stopped")
-    async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
+    # Log transcript updates. In realtime mode the turn-stopped events
+    # fire before the message text is finalized (UserTurnStoppedMessage
+    # content is None), so subscribe to the *_message_added events
+    # instead — they fire when the message is written to context and
+    # carry the finalized content.
+    @user_aggregator.event_handler("on_user_message_added")
+    async def on_user_message_added(aggregator, message: UserTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}user: {message.content}"
        logger.info(f"Transcript: {line}")

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")
--- a/examples/realtime/realtime-ultravox-async-tool.py
+++ b/examples/realtime/realtime-ultravox-async-tool.py
@@ -26,14 +26,13 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -42,8 +41,6 @@ from pipecat.services.ultravox.llm import OneShotInputParams, UltravoxRealtimeLL
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
-from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy
-from pipecat.turns.user_turn_strategies import UserTurnStrategies

 load_dotenv(override=True)

@@ -134,12 +131,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext([])
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(
-            user_turn_strategies=UserTurnStrategies(
-                stop=[SpeechTimeoutUserTurnStopStrategy()],
-            ),
-            vad_analyzer=SileroVADAnalyzer(),
-        ),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/realtime/realtime-ultravox-text.py
+++ b/examples/realtime/realtime-ultravox-text.py
@@ -12,8 +12,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.audio.vad.vad_analyzer import VADParams
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -21,7 +19,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
    UserTurnStoppedMessage,
 )
 from pipecat.runner.types import RunnerArguments
@@ -32,8 +30,6 @@ from pipecat.services.ultravox.llm import OneShotInputParams, UltravoxRealtimeLL
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
-from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy
-from pipecat.turns.user_turn_strategies import UserTurnStrategies

 # Load environment variables
 load_dotenv(override=True)
@@ -188,17 +184,9 @@ There is also a secret menu that changes daily. If the user asks about it, use t

    context = LLMContext([])

-    # Necessary to complete the function call lifecycle in Pipecat and
-    # to produce user and assistant turn stopped events.
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(
-            user_turn_strategies=UserTurnStrategies(
-                stop=[SpeechTimeoutUserTurnStopStrategy()],
-            ),
-            # Set the VAD analyzer to emulate timing of the model.
-            vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
-        ),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    # Build the pipeline
@@ -234,14 +222,16 @@ There is also a secret menu that changes daily. If the user asks about it, use t
        logger.info(f"Client disconnected")
        await task.cancel()

-    @user_aggregator.event_handler("on_user_turn_stopped")
-    async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
+    # Ultravox doesn't emit user-turn frames; subscribe to the
+    # *_message_added events for the finalized message text.
+    @user_aggregator.event_handler("on_user_message_added")
+    async def on_user_message_added(aggregator, message: UserTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}user: {message.content}"
        logger.info(f"Transcript: {line}")

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")
--- a/examples/realtime/realtime-ultravox.py
+++ b/examples/realtime/realtime-ultravox.py
@@ -12,7 +12,6 @@ from loguru import logger

 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -20,7 +19,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
    UserTurnStoppedMessage,
 )
 from pipecat.runner.types import RunnerArguments
@@ -30,8 +29,6 @@ from pipecat.services.ultravox.llm import OneShotInputParams, UltravoxRealtimeLL
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
 from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
-from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy
-from pipecat.turns.user_turn_strategies import UserTurnStrategies

 # Load environment variables
 load_dotenv(override=True)
@@ -178,18 +175,23 @@ There is also a secret menu that changes daily. If the user asks about it, use t

    context = LLMContext([])

-    # Necessary to complete the function call lifecycle in Pipecat and
-    # to produce user and assistant turn stopped events.
+    # Ultravox drives the conversation server-side. It does NOT emit
+    # UserStartedSpeakingFrame / UserStoppedSpeakingFrame, so pipeline
+    # processors that depend on those frames — RTVI client speech events,
+    # TurnTrackingObserver, AudioBufferProcessor turn recording,
+    # UserIdleController, user mute strategies, voicemail detector — won't
+    # activate with this default setup. Context aggregation still works
+    # with realtime_service_mode.
+    #
+    # To produce these frames locally, wire a VAD analyzer (e.g.
+    # SileroVADAnalyzer) into LLMUserAggregatorParams. Caveat: locally-
+    # generated turn boundaries are a heuristic and may not match
+    # Ultravox's server-side turn decisions, which is what drives the
+    # conversation; the two can drift apart in subtle ways especially
+    # around interruptions and overlapping speech.
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(
-            user_turn_strategies=UserTurnStrategies(
-                stop=[SpeechTimeoutUserTurnStopStrategy()],
-            ),
-            # Set the VAD analyzer to create reliable TTFB measurements and
-            # user stop events.
-            vad_analyzer=SileroVADAnalyzer(),
-        ),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    # Build the pipeline
@@ -224,14 +226,18 @@ There is also a secret menu that changes daily. If the user asks about it, use t
        logger.info(f"Client disconnected")
        await task.cancel()

-    @user_aggregator.event_handler("on_user_turn_stopped")
-    async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
+    # Ultravox doesn't emit user-turn frames so on_user_turn_stopped
+    # would never fire. The *_message_added events fire when messages are
+    # written to context and carry the finalized content; use those for
+    # transcript logging.
+    @user_aggregator.event_handler("on_user_message_added")
+    async def on_user_message_added(aggregator, message: UserTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}user: {message.content}"
        logger.info(f"Transcript: {line}")

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")
--- a/examples/update-settings/llm/llm-aws-nova-sonic.py
+++ b/examples/update-settings/llm/llm-aws-nova-sonic.py
@@ -10,7 +10,6 @@ import os
 from dotenv import load_dotenv
 from loguru import logger

-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -18,7 +17,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -60,7 +59,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext()
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/update-settings/llm/llm-azure-realtime.py
+++ b/examples/update-settings/llm/llm-azure-realtime.py
@@ -11,7 +11,6 @@ from dotenv import load_dotenv
 from loguru import logger

 from pipecat.adapters.base_llm_adapter import LLMContextMessage
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -20,7 +19,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -66,7 +65,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext(messages)
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
@@ -88,8 +87,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")
--- a/examples/update-settings/llm/llm-gemini-live-vertex.py
+++ b/examples/update-settings/llm/llm-gemini-live-vertex.py
@@ -10,7 +10,6 @@ import os
 from dotenv import load_dotenv
 from loguru import logger

-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -18,7 +17,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -60,7 +59,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext()
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/update-settings/llm/llm-gemini-live.py
+++ b/examples/update-settings/llm/llm-gemini-live.py
@@ -10,7 +10,6 @@ import os
 from dotenv import load_dotenv
 from loguru import logger

-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -18,7 +17,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -58,7 +57,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext()
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
--- a/examples/update-settings/llm/llm-grok-realtime.py
+++ b/examples/update-settings/llm/llm-grok-realtime.py
@@ -11,7 +11,6 @@ from dotenv import load_dotenv
 from loguru import logger

 from pipecat.adapters.base_llm_adapter import LLMContextMessage
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -20,7 +19,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -63,7 +62,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext(messages)
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
@@ -85,8 +84,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")
--- a/examples/update-settings/llm/llm-openai-realtime.py
+++ b/examples/update-settings/llm/llm-openai-realtime.py
@@ -11,7 +11,6 @@ from dotenv import load_dotenv
 from loguru import logger

 from pipecat.adapters.base_llm_adapter import LLMContextMessage
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -20,7 +19,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -63,7 +62,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext(messages)
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
@@ -85,8 +84,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")
--- a/examples/update-settings/llm/llm-ultravox-realtime.py
+++ b/examples/update-settings/llm/llm-ultravox-realtime.py
@@ -13,7 +13,6 @@ from loguru import logger

 from pipecat.adapters.base_llm_adapter import LLMContextMessage
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -22,7 +21,7 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import (
    AssistantTurnStoppedMessage,
    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
+    RealtimeServiceModeConfig,
 )
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
@@ -74,7 +73,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = LLMContext(messages)
    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
        context,
-        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+        realtime_service_mode=RealtimeServiceModeConfig(),
    )

    pipeline = Pipeline(
@@ -96,8 +95,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
    )

-    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
-    async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
+    @assistant_aggregator.event_handler("on_assistant_message_added")
+    async def on_assistant_message_added(aggregator, message: AssistantTurnStoppedMessage):
        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
        line = f"{timestamp}assistant: {message.content}"
        logger.info(f"Transcript: {line}")