chore: use low reasoning_effort in Inception function-calling example

feat: default realtime to True in Inception LLM service
2026-05-21 14:55:03 -04:00 · 2026-05-21 14:54:45 -04:00
2 changed files with 3 additions and 2 deletions
--- a/examples/function-calling/function-calling-inception.py
+++ b/examples/function-calling/function-calling-inception.py
@@ -76,7 +76,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    llm = InceptionLLMService(
        api_key=os.environ["INCEPTION_API_KEY"],
        settings=InceptionLLMService.Settings(
-            reasoning_effort="instant",
+            reasoning_effort="low",
            system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
        ),
    )
--- a/src/pipecat/services/inception/llm.py
+++ b/src/pipecat/services/inception/llm.py
@@ -27,6 +27,7 @@ class InceptionLLMSettings(BaseOpenAILLMService.Settings):
            One of "instant", "low", "medium", or "high". When unset, the
            parameter is omitted and Inception's server-side default applies.
        realtime: When True, reduces time to first diffusion block (TTFT).
+            Defaults to True.
    """

    reasoning_effort: Literal["instant", "low", "medium", "high"] | None | _NotGiven = field(
@@ -68,7 +69,7 @@ class InceptionLLMService(OpenAILLMService):
        default_settings = self.Settings(
            model="mercury-2",
            reasoning_effort=None,
-            realtime=None,
+            realtime=True,
        )

        if settings is not None:
Author	SHA1	Message	Date
Mark Backman	f8f39a4c6e	chore: use low reasoning_effort in Inception function-calling example	2026-05-21 14:55:03 -04:00
Mark Backman	1c26c635a1	feat: default realtime to True in Inception LLM service	2026-05-21 14:54:45 -04:00