Compare commits

...

2 Commits

Author SHA1 Message Date
Mark Backman
f8f39a4c6e chore: use low reasoning_effort in Inception function-calling example 2026-05-21 14:55:03 -04:00
Mark Backman
1c26c635a1 feat: default realtime to True in Inception LLM service 2026-05-21 14:54:45 -04:00
2 changed files with 3 additions and 2 deletions

View File

@@ -76,7 +76,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
llm = InceptionLLMService(
api_key=os.environ["INCEPTION_API_KEY"],
settings=InceptionLLMService.Settings(
reasoning_effort="instant",
reasoning_effort="low",
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
),
)

View File

@@ -27,6 +27,7 @@ class InceptionLLMSettings(BaseOpenAILLMService.Settings):
One of "instant", "low", "medium", or "high". When unset, the
parameter is omitted and Inception's server-side default applies.
realtime: When True, reduces time to first diffusion block (TTFT).
Defaults to True.
"""
reasoning_effort: Literal["instant", "low", "medium", "high"] | None | _NotGiven = field(
@@ -68,7 +69,7 @@ class InceptionLLMService(OpenAILLMService):
default_settings = self.Settings(
model="mercury-2",
reasoning_effort=None,
realtime=None,
realtime=True,
)
if settings is not None: