From 77cc314a08b7403198911377eeba5f7343b5136a Mon Sep 17 00:00:00 2001
From: joycech333 <joycech@stanford.edu>
Date: Mon, 4 May 2026 20:25:31 +0000
Subject: [PATCH 1/2] feat: add Inception LLM service with Mercury-2 support

Adds InceptionLLMService, an OpenAI-compatible service for Inception's
Mercury-2 diffusion-based reasoning model. Supports reasoning_effort
(instant/low/medium/high) and realtime mode for reduced TTFT.
---
 README.md                                     |   2 +-
 changelog/4423.added.md                       |   1 +
 env.example                                   |   3 +
 .../function-calling-inception.py             | 167 ++++++++++++++++++
 pyproject.toml                                |   1 +
 src/pipecat/services/inception/__init__.py    |   0
 src/pipecat/services/inception/llm.py         | 130 ++++++++++++++
 7 files changed, 303 insertions(+), 1 deletion(-)
 create mode 100644 changelog/4423.added.md
 create mode 100644 examples/function-calling/function-calling-inception.py
 create mode 100644 src/pipecat/services/inception/__init__.py
 create mode 100644 src/pipecat/services/inception/llm.py

diff --git a/README.md b/README.md
index e2591a7dc..8850b4722 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
 | Category            | Services                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | Speech-to-Text      | [AssemblyAI](https://docs.pipecat.ai/api-reference/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/api-reference/server/services/stt/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/api-reference/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/api-reference/server/services/stt/gladia), [Google](https://docs.pipecat.ai/api-reference/server/services/stt/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/api-reference/server/services/stt/groq), [Mistral](https://docs.pipecat.ai/api-reference/server/services/stt/mistral), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/stt/nvidia), [OpenAI (Whisper)](https://docs.pipecat.ai/api-reference/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/api-reference/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/api-reference/server/services/stt/whisper), [xAI](https://docs.pipecat.ai/api-reference/server/services/stt/xai)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| LLMs                | [Anthropic](https://docs.pipecat.ai/api-reference/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/api-reference/server/services/llm/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/api-reference/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/api-reference/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/api-reference/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/api-reference/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/api-reference/server/services/llm/grok), [Groq](https://docs.pipecat.ai/api-reference/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/api-reference/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/api-reference/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/api-reference/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/api-reference/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/api-reference/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/llm/openai), [OpenAI Responses](https://docs.pipecat.ai/api-reference/server/services/llm/openai-responses), [OpenRouter](https://docs.pipecat.ai/api-reference/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/api-reference/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/api-reference/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/api-reference/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/api-reference/server/services/llm/together)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| LLMs                | [Anthropic](https://docs.pipecat.ai/api-reference/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/api-reference/server/services/llm/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/api-reference/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/api-reference/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/api-reference/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/api-reference/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/api-reference/server/services/llm/grok), [Groq](https://docs.pipecat.ai/api-reference/server/services/llm/groq), [Inception](https://docs.pipecat.ai/api-reference/server/services/llm/inception), [Mistral](https://docs.pipecat.ai/api-reference/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/api-reference/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/api-reference/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/api-reference/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/api-reference/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/llm/openai), [OpenAI Responses](https://docs.pipecat.ai/api-reference/server/services/llm/openai-responses), [OpenRouter](https://docs.pipecat.ai/api-reference/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/api-reference/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/api-reference/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/api-reference/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/api-reference/server/services/llm/together)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | Text-to-Speech      | [Async](https://docs.pipecat.ai/api-reference/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/api-reference/server/services/tts/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/api-reference/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/api-reference/server/services/tts/fish), [Google](https://docs.pipecat.ai/api-reference/server/services/tts/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/api-reference/server/services/tts/groq), [Hume](https://docs.pipecat.ai/api-reference/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/api-reference/server/services/tts/inworld), [Kokoro](https://docs.pipecat.ai/api-reference/server/services/tts/kokoro), [LMNT](https://docs.pipecat.ai/api-reference/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/api-reference/server/services/tts/minimax), [Mistral](https://docs.pipecat.ai/api-reference/server/services/tts/mistral), [Neuphonic](https://docs.pipecat.ai/api-reference/server/services/tts/neuphonic), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/tts/nvidia), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/tts/openai), [Piper](https://docs.pipecat.ai/api-reference/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/api-reference/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/api-reference/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/api-reference/server/services/tts/smallest), [Soniox](https://docs.pipecat.ai/api-reference/server/services/tts/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/api-reference/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/api-reference/server/services/tts/xtts) |
 | Speech-to-Speech    | [AWS Nova Sonic](https://docs.pipecat.ai/api-reference/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/api-reference/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/api-reference/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/api-reference/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/api-reference/server/services/s2s/ultravox),                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | Transport           | [Daily (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/api-reference/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/api-reference/server/services/transport/small-webrtc), [Vonage (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/vonage), [WebSocket Server](https://docs.pipecat.ai/api-reference/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/api-reference/server/services/transport/whatsapp), Local                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
diff --git a/changelog/4423.added.md b/changelog/4423.added.md
new file mode 100644
index 000000000..9ab4076c0
--- /dev/null
+++ b/changelog/4423.added.md
@@ -0,0 +1 @@
+- Added `InceptionLLMService` for Inception's Mercury 2 diffusion reasoning model, with support for `reasoning_effort` and `realtime` settings.
diff --git a/env.example b/env.example
index 11a7e606e..2f8dc475b 100644
--- a/env.example
+++ b/env.example
@@ -91,6 +91,9 @@ HEYGEN_LIVE_AVATAR_API_KEY=...
 HUME_API_KEY=...
 HUME_VOICE_ID=...
 
+# Inception
+INCEPTION_API_KEY=...
+
 # Inworld
 INWORLD_API_KEY=...
 
diff --git a/examples/function-calling/function-calling-inception.py b/examples/function-calling/function-calling-inception.py
new file mode 100644
index 000000000..556cad7e7
--- /dev/null
+++ b/examples/function-calling/function-calling-inception.py
@@ -0,0 +1,167 @@
+#
+# Copyright (c) 2024-2026, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    LLMUserAggregatorParams,
+)
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.inception.llm import InceptionLLMService
+from pipecat.services.llm_service import FunctionCallParams
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+
+async def fetch_weather_from_api(params: FunctionCallParams):
+    await params.result_callback({"conditions": "nice", "temperature": "75"})
+
+
+# We use lambdas to defer transport parameter creation until the transport
+# type is selected at runtime.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"])
+
+    tts = CartesiaTTSService(
+        api_key=os.environ["CARTESIA_API_KEY"],
+        settings=CartesiaTTSService.Settings(
+            voice="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+        ),
+    )
+
+    llm = InceptionLLMService(
+        api_key=os.environ["INCEPTION_API_KEY"],
+        settings=InceptionLLMService.Settings(
+            reasoning_effort="medium",
+            system_instruction="""You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.
+
+You have one functions available:
+
+1. get_current_weather is used to get current weather information.
+
+Infer whether to use Fahrenheit or Celsius automatically based on the location, unless the user specifies a preference.
+
+Start by asking me for my location. Then, use 'get_weather_current' to give me a forecast.
+
+    Respond to what the user said in a creative and helpful way.""",
+        ),
+    )
+    # You can also register a function_name of None to get all functions
+    # sent to the same callback with an additional function_name parameter.
+    llm.register_function("get_current_weather", fetch_weather_from_api)
+
+    @llm.event_handler("on_function_calls_started")
+    async def on_function_calls_started(service, function_calls):
+        await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
+
+    weather_function = FunctionSchema(
+        name="get_current_weather",
+        description="Get the current weather",
+        properties={
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA",
+            },
+            "format": {
+                "type": "string",
+                "enum": ["celsius", "fahrenheit"],
+                "description": "The temperature unit to use. Infer this from the user's location.",
+            },
+        },
+        required=["location", "format"],
+    )
+    tools = ToolsSchema(standard_tools=[weather_function])
+
+    context = LLMContext(tools=tools)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+    )
+
+    pipeline = Pipeline(
+        [
+            transport.input(),
+            stt,
+            user_aggregator,
+            llm,
+            tts,
+            transport.output(),
+            assistant_aggregator,
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index ee73126b3..d335012d3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,6 +77,7 @@ groq = [ "groq>=0.23.0,<2" ]
 gstreamer = [ "pygobject~=3.50.0" ]
 heygen = [ "livekit>=1.0.13,<2", "pipecat-ai[websockets-base]" ]
 hume = [ "hume>=0.11.2,<1" ]
+inception = []
 inworld = [ "pipecat-ai[websockets-base]" ]
 koala = [ "pvkoala~=2.0.3" ]
 kokoro = [ "kokoro-onnx>=0.5.0,<1", "requests>=2.32.5,<3" ]
diff --git a/src/pipecat/services/inception/__init__.py b/src/pipecat/services/inception/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/pipecat/services/inception/llm.py b/src/pipecat/services/inception/llm.py
new file mode 100644
index 000000000..72202cb7e
--- /dev/null
+++ b/src/pipecat/services/inception/llm.py
@@ -0,0 +1,130 @@
+#
+# Copyright (c) 2024-2026, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+"""Inception LLM service implementation using OpenAI-compatible interface."""
+
+from dataclasses import dataclass, field
+from typing import Literal
+
+from loguru import logger
+
+from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
+from pipecat.services.openai.base_llm import BaseOpenAILLMService
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN
+from pipecat.services.settings import _NotGiven, is_given
+
+
+@dataclass
+class InceptionLLMSettings(BaseOpenAILLMService.Settings):
+    """Settings for InceptionLLMService.
+
+    Parameters:
+        reasoning_effort: Controls how much reasoning the model applies.
+            One of "instant", "low", "medium", or "high". Defaults to "medium".
+        realtime: When True, reduces time to first diffusion block (TTFT).
+    """
+
+    reasoning_effort: Literal["instant", "low", "medium", "high"] | None | _NotGiven = field(
+        default_factory=lambda: _NOT_GIVEN
+    )
+    realtime: bool | None | _NotGiven = field(default_factory=lambda: _NOT_GIVEN)
+
+
+class InceptionLLMService(OpenAILLMService):
+    """A service for interacting with Inception's API using the OpenAI-compatible interface.
+
+    This service extends OpenAILLMService to connect to Inception's API endpoint while
+    maintaining full compatibility with OpenAI's interface and functionality.
+    Supports Mercury-2, Inception's diffusion-based reasoning model.
+    """
+
+    # Inception doesn't support the "developer" message role.
+    supports_developer_role = False
+
+    Settings = InceptionLLMSettings
+    _settings: Settings
+
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        base_url: str = "https://api.inceptionlabs.ai/v1",
+        model: str | None = None,
+        settings: Settings | None = None,
+        **kwargs,
+    ):
+        """Initialize the Inception LLM service.
+
+        Args:
+            api_key: The API key for accessing Inception's API.
+            base_url: The base URL for Inception API. Defaults to "https://api.inceptionlabs.ai/v1".
+            model: The model identifier to use. Defaults to "mercury-2".
+
+                .. deprecated:: 0.0.105
+                    Use ``settings=InceptionLLMService.Settings(model=...)`` instead.
+
+            settings: Runtime-updatable settings. When provided alongside deprecated
+                parameters, ``settings`` values take precedence.
+            **kwargs: Additional keyword arguments passed to OpenAILLMService.
+        """
+        default_settings = self.Settings(model="mercury-2", reasoning_effort=None, realtime=None)
+
+        if model is not None:
+            self._warn_init_param_moved_to_settings("model", "model")
+            default_settings.model = model
+
+        if settings is not None:
+            default_settings.apply_update(settings)
+
+        super().__init__(api_key=api_key, base_url=base_url, settings=default_settings, **kwargs)
+
+    def create_client(self, api_key=None, base_url=None, **kwargs):
+        """Create OpenAI-compatible client for Inception API endpoint.
+
+        Args:
+            api_key: The API key for authentication. If None, uses instance default.
+            base_url: The base URL for the API. If None, uses instance default.
+            **kwargs: Additional keyword arguments for client configuration.
+
+        Returns:
+            An OpenAI-compatible client configured for Inception's API.
+        """
+        logger.debug(f"Creating Inception client with api {base_url}")
+        return super().create_client(api_key, base_url, **kwargs)
+
+    def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
+        """Build parameters for Inception chat completion request.
+
+        Extends the base OpenAI parameters with Inception-specific options
+        such as reasoning_effort and realtime.
+
+        Args:
+            params_from_context: Parameters, derived from the LLM context, to
+                use for the chat completion. Contains messages, tools, and tool
+                choice.
+
+        Returns:
+            Dictionary of parameters for the chat completion request.
+        """
+        params = super().build_chat_completion_params(params_from_context)
+
+        if (
+            is_given(self._settings.reasoning_effort)
+            and self._settings.reasoning_effort is not None
+        ):
+            params["reasoning_effort"] = self._settings.reasoning_effort
+
+        # realtime is Inception-specific and unknown to the OpenAI SDK,
+        # so it must be passed via extra_body to avoid validation errors.
+        extra_body = {}
+        if is_given(self._settings.realtime) and self._settings.realtime is not None:
+            extra_body["realtime"] = self._settings.realtime
+
+        if extra_body:
+            params["extra_body"] = extra_body
+
+        return params

From 28f9203401424ca6687e1dc77fb8a9b7043ec66b Mon Sep 17 00:00:00 2001
From: Mark Backman <mark@daily.co>
Date: Thu, 21 May 2026 11:22:16 -0400
Subject: [PATCH 2/2] Code review fixes

---
 .../function-calling-inception.py             | 36 ++++++++++++-------
 scripts/evals/run-release-evals.py            |  1 +
 src/pipecat/services/inception/llm.py         | 22 +++++-------
 uv.lock                                       |  2 +-
 4 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/examples/function-calling/function-calling-inception.py b/examples/function-calling/function-calling-inception.py
index 556cad7e7..416e39a25 100644
--- a/examples/function-calling/function-calling-inception.py
+++ b/examples/function-calling/function-calling-inception.py
@@ -39,6 +39,10 @@ async def fetch_weather_from_api(params: FunctionCallParams):
     await params.result_callback({"conditions": "nice", "temperature": "75"})
 
 
+async def fetch_restaurant_recommendation(params: FunctionCallParams):
+    await params.result_callback({"name": "The Golden Dragon"})
+
+
 # We use lambdas to defer transport parameter creation until the transport
 # type is selected at runtime.
 transport_params = {
@@ -72,23 +76,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
     llm = InceptionLLMService(
         api_key=os.environ["INCEPTION_API_KEY"],
         settings=InceptionLLMService.Settings(
-            reasoning_effort="medium",
-            system_instruction="""You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.
-
-You have one functions available:
-
-1. get_current_weather is used to get current weather information.
-
-Infer whether to use Fahrenheit or Celsius automatically based on the location, unless the user specifies a preference.
-
-Start by asking me for my location. Then, use 'get_weather_current' to give me a forecast.
-
-    Respond to what the user said in a creative and helpful way.""",
+            reasoning_effort="instant",
+            system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
         ),
     )
     # You can also register a function_name of None to get all functions
     # sent to the same callback with an additional function_name parameter.
     llm.register_function("get_current_weather", fetch_weather_from_api)
+    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
 
     @llm.event_handler("on_function_calls_started")
     async def on_function_calls_started(service, function_calls):
@@ -110,7 +105,19 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a
         },
         required=["location", "format"],
     )
-    tools = ToolsSchema(standard_tools=[weather_function])
+
+    restaurant_function = FunctionSchema(
+        name="get_restaurant_recommendation",
+        description="Get a restaurant recommendation",
+        properties={
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA",
+            },
+        },
+        required=["location"],
+    )
+    tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
 
     context = LLMContext(tools=tools)
     user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
@@ -143,6 +150,9 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a
     async def on_client_connected(transport, client):
         logger.info(f"Client connected")
         # Kick off the conversation.
+        context.add_message(
+            {"role": "developer", "content": "Please introduce yourself to the user."}
+        )
         await task.queue_frames([LLMRunFrame()])
 
     @transport.event_handler("on_client_disconnected")
diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py
index c881d4432..96c89feb0 100644
--- a/scripts/evals/run-release-evals.py
+++ b/scripts/evals/run-release-evals.py
@@ -198,6 +198,7 @@ TESTS_FUNCTION_CALLING = [
     ("function-calling/function-calling-sarvam.py", EVAL_WEATHER),
     ("function-calling/function-calling-novita.py", EVAL_WEATHER),
     ("function-calling/function-calling-deepseek.py", EVAL_WEATHER),
+    ("function-calling/function-calling-inception.py", EVAL_WEATHER),
     # Video
     ("function-calling/function-calling-anthropic-video.py", EVAL_VISION_CAMERA),
     ("function-calling/function-calling-aws-video.py", EVAL_VISION_CAMERA),
diff --git a/src/pipecat/services/inception/llm.py b/src/pipecat/services/inception/llm.py
index 72202cb7e..6a06810fa 100644
--- a/src/pipecat/services/inception/llm.py
+++ b/src/pipecat/services/inception/llm.py
@@ -24,7 +24,8 @@ class InceptionLLMSettings(BaseOpenAILLMService.Settings):
 
     Parameters:
         reasoning_effort: Controls how much reasoning the model applies.
-            One of "instant", "low", "medium", or "high". Defaults to "medium".
+            One of "instant", "low", "medium", or "high". When unset, the
+            parameter is omitted and Inception's server-side default applies.
         realtime: When True, reduces time to first diffusion block (TTFT).
     """
 
@@ -53,7 +54,6 @@ class InceptionLLMService(OpenAILLMService):
         *,
         api_key: str,
         base_url: str = "https://api.inceptionlabs.ai/v1",
-        model: str | None = None,
         settings: Settings | None = None,
         **kwargs,
     ):
@@ -62,20 +62,14 @@ class InceptionLLMService(OpenAILLMService):
         Args:
             api_key: The API key for accessing Inception's API.
             base_url: The base URL for Inception API. Defaults to "https://api.inceptionlabs.ai/v1".
-            model: The model identifier to use. Defaults to "mercury-2".
-
-                .. deprecated:: 0.0.105
-                    Use ``settings=InceptionLLMService.Settings(model=...)`` instead.
-
-            settings: Runtime-updatable settings. When provided alongside deprecated
-                parameters, ``settings`` values take precedence.
+            settings: Runtime-updatable settings.
             **kwargs: Additional keyword arguments passed to OpenAILLMService.
         """
-        default_settings = self.Settings(model="mercury-2", reasoning_effort=None, realtime=None)
-
-        if model is not None:
-            self._warn_init_param_moved_to_settings("model", "model")
-            default_settings.model = model
+        default_settings = self.Settings(
+            model="mercury-2",
+            reasoning_effort=None,
+            realtime=None,
+        )
 
         if settings is not None:
             default_settings.apply_update(settings)
diff --git a/uv.lock b/uv.lock
index de83cc67d..6972e2fae 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4574,7 +4574,7 @@ requires-dist = [
     { name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1,<1" },
     { name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" },
 ]
-provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "langchain", "lemonslice", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "nebius", "neuphonic", "novita", "nvidia", "openai", "rnnoise", "openrouter", "perplexity", "piper", "qwen", "resembleai", "rime", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "smallest", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "vonage-video-connector", "webrtc", "websocket", "websockets-base", "whisper", "xai"]
+provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inception", "inworld", "koala", "kokoro", "langchain", "lemonslice", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "nebius", "neuphonic", "novita", "nvidia", "openai", "rnnoise", "openrouter", "perplexity", "piper", "qwen", "resembleai", "rime", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "smallest", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "vonage-video-connector", "webrtc", "websocket", "websockets-base", "whisper", "xai"]
 
 [package.metadata.requires-dev]
 dev = [