From 77cc314a08b7403198911377eeba5f7343b5136a Mon Sep 17 00:00:00 2001 From: joycech333 Date: Mon, 4 May 2026 20:25:31 +0000 Subject: [PATCH 1/2] feat: add Inception LLM service with Mercury-2 support Adds InceptionLLMService, an OpenAI-compatible service for Inception's Mercury-2 diffusion-based reasoning model. Supports reasoning_effort (instant/low/medium/high) and realtime mode for reduced TTFT. --- README.md | 2 +- changelog/4423.added.md | 1 + env.example | 3 + .../function-calling-inception.py | 167 ++++++++++++++++++ pyproject.toml | 1 + src/pipecat/services/inception/__init__.py | 0 src/pipecat/services/inception/llm.py | 130 ++++++++++++++ 7 files changed, 303 insertions(+), 1 deletion(-) create mode 100644 changelog/4423.added.md create mode 100644 examples/function-calling/function-calling-inception.py create mode 100644 src/pipecat/services/inception/__init__.py create mode 100644 src/pipecat/services/inception/llm.py diff --git a/README.md b/README.md index e2591a7dc..8850b4722 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout | Category | Services | | ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/api-reference/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/api-reference/server/services/stt/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/api-reference/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/api-reference/server/services/stt/gladia), [Google](https://docs.pipecat.ai/api-reference/server/services/stt/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/api-reference/server/services/stt/groq), [Mistral](https://docs.pipecat.ai/api-reference/server/services/stt/mistral), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/stt/nvidia), [OpenAI (Whisper)](https://docs.pipecat.ai/api-reference/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/api-reference/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/api-reference/server/services/stt/whisper), [xAI](https://docs.pipecat.ai/api-reference/server/services/stt/xai) | -| LLMs | [Anthropic](https://docs.pipecat.ai/api-reference/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/api-reference/server/services/llm/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/api-reference/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/api-reference/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/api-reference/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/api-reference/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/api-reference/server/services/llm/grok), [Groq](https://docs.pipecat.ai/api-reference/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/api-reference/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/api-reference/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/api-reference/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/api-reference/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/api-reference/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/llm/openai), [OpenAI Responses](https://docs.pipecat.ai/api-reference/server/services/llm/openai-responses), [OpenRouter](https://docs.pipecat.ai/api-reference/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/api-reference/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/api-reference/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/api-reference/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/api-reference/server/services/llm/together) | +| LLMs | [Anthropic](https://docs.pipecat.ai/api-reference/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/api-reference/server/services/llm/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/api-reference/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/api-reference/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/api-reference/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/api-reference/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/api-reference/server/services/llm/grok), [Groq](https://docs.pipecat.ai/api-reference/server/services/llm/groq), [Inception](https://docs.pipecat.ai/api-reference/server/services/llm/inception), [Mistral](https://docs.pipecat.ai/api-reference/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/api-reference/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/api-reference/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/api-reference/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/api-reference/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/llm/openai), [OpenAI Responses](https://docs.pipecat.ai/api-reference/server/services/llm/openai-responses), [OpenRouter](https://docs.pipecat.ai/api-reference/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/api-reference/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/api-reference/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/api-reference/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/api-reference/server/services/llm/together) | | Text-to-Speech | [Async](https://docs.pipecat.ai/api-reference/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/api-reference/server/services/tts/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/api-reference/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/api-reference/server/services/tts/fish), [Google](https://docs.pipecat.ai/api-reference/server/services/tts/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/api-reference/server/services/tts/groq), [Hume](https://docs.pipecat.ai/api-reference/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/api-reference/server/services/tts/inworld), [Kokoro](https://docs.pipecat.ai/api-reference/server/services/tts/kokoro), [LMNT](https://docs.pipecat.ai/api-reference/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/api-reference/server/services/tts/minimax), [Mistral](https://docs.pipecat.ai/api-reference/server/services/tts/mistral), [Neuphonic](https://docs.pipecat.ai/api-reference/server/services/tts/neuphonic), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/tts/nvidia), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/tts/openai), [Piper](https://docs.pipecat.ai/api-reference/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/api-reference/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/api-reference/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/api-reference/server/services/tts/smallest), [Soniox](https://docs.pipecat.ai/api-reference/server/services/tts/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/api-reference/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/api-reference/server/services/tts/xtts) | | Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/api-reference/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/api-reference/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/api-reference/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/api-reference/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/api-reference/server/services/s2s/ultravox), | | Transport | [Daily (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/api-reference/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/api-reference/server/services/transport/small-webrtc), [Vonage (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/vonage), [WebSocket Server](https://docs.pipecat.ai/api-reference/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/api-reference/server/services/transport/whatsapp), Local | diff --git a/changelog/4423.added.md b/changelog/4423.added.md new file mode 100644 index 000000000..9ab4076c0 --- /dev/null +++ b/changelog/4423.added.md @@ -0,0 +1 @@ +- Added `InceptionLLMService` for Inception's Mercury 2 diffusion reasoning model, with support for `reasoning_effort` and `realtime` settings. diff --git a/env.example b/env.example index 11a7e606e..2f8dc475b 100644 --- a/env.example +++ b/env.example @@ -91,6 +91,9 @@ HEYGEN_LIVE_AVATAR_API_KEY=... HUME_API_KEY=... HUME_VOICE_ID=... +# Inception +INCEPTION_API_KEY=... + # Inworld INWORLD_API_KEY=... diff --git a/examples/function-calling/function-calling-inception.py b/examples/function-calling/function-calling-inception.py new file mode 100644 index 000000000..556cad7e7 --- /dev/null +++ b/examples/function-calling/function-calling-inception.py @@ -0,0 +1,167 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.adapters.schemas.function_schema import FunctionSchema +from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.inception.llm import InceptionLLMService +from pipecat.services.llm_service import FunctionCallParams +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +async def fetch_weather_from_api(params: FunctionCallParams): + await params.result_callback({"conditions": "nice", "temperature": "75"}) + + +# We use lambdas to defer transport parameter creation until the transport +# type is selected at runtime. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"]) + + tts = CartesiaTTSService( + api_key=os.environ["CARTESIA_API_KEY"], + settings=CartesiaTTSService.Settings( + voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ), + ) + + llm = InceptionLLMService( + api_key=os.environ["INCEPTION_API_KEY"], + settings=InceptionLLMService.Settings( + reasoning_effort="medium", + system_instruction="""You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way. + +You have one functions available: + +1. get_current_weather is used to get current weather information. + +Infer whether to use Fahrenheit or Celsius automatically based on the location, unless the user specifies a preference. + +Start by asking me for my location. Then, use 'get_weather_current' to give me a forecast. + + Respond to what the user said in a creative and helpful way.""", + ), + ) + # You can also register a function_name of None to get all functions + # sent to the same callback with an additional function_name parameter. + llm.register_function("get_current_weather", fetch_weather_from_api) + + @llm.event_handler("on_function_calls_started") + async def on_function_calls_started(service, function_calls): + await tts.queue_frame(TTSSpeakFrame("Let me check on that.")) + + weather_function = FunctionSchema( + name="get_current_weather", + description="Get the current weather", + properties={ + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "format": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "The temperature unit to use. Infer this from the user's location.", + }, + }, + required=["location", "format"], + ) + tools = ToolsSchema(standard_tools=[weather_function]) + + context = LLMContext(tools=tools) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + # Kick off the conversation. + await task.queue_frames([LLMRunFrame()]) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/pyproject.toml b/pyproject.toml index ee73126b3..d335012d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,7 @@ groq = [ "groq>=0.23.0,<2" ] gstreamer = [ "pygobject~=3.50.0" ] heygen = [ "livekit>=1.0.13,<2", "pipecat-ai[websockets-base]" ] hume = [ "hume>=0.11.2,<1" ] +inception = [] inworld = [ "pipecat-ai[websockets-base]" ] koala = [ "pvkoala~=2.0.3" ] kokoro = [ "kokoro-onnx>=0.5.0,<1", "requests>=2.32.5,<3" ] diff --git a/src/pipecat/services/inception/__init__.py b/src/pipecat/services/inception/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/pipecat/services/inception/llm.py b/src/pipecat/services/inception/llm.py new file mode 100644 index 000000000..72202cb7e --- /dev/null +++ b/src/pipecat/services/inception/llm.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Inception LLM service implementation using OpenAI-compatible interface.""" + +from dataclasses import dataclass, field +from typing import Literal + +from loguru import logger + +from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams +from pipecat.services.openai.base_llm import BaseOpenAILLMService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN +from pipecat.services.settings import _NotGiven, is_given + + +@dataclass +class InceptionLLMSettings(BaseOpenAILLMService.Settings): + """Settings for InceptionLLMService. + + Parameters: + reasoning_effort: Controls how much reasoning the model applies. + One of "instant", "low", "medium", or "high". Defaults to "medium". + realtime: When True, reduces time to first diffusion block (TTFT). + """ + + reasoning_effort: Literal["instant", "low", "medium", "high"] | None | _NotGiven = field( + default_factory=lambda: _NOT_GIVEN + ) + realtime: bool | None | _NotGiven = field(default_factory=lambda: _NOT_GIVEN) + + +class InceptionLLMService(OpenAILLMService): + """A service for interacting with Inception's API using the OpenAI-compatible interface. + + This service extends OpenAILLMService to connect to Inception's API endpoint while + maintaining full compatibility with OpenAI's interface and functionality. + Supports Mercury-2, Inception's diffusion-based reasoning model. + """ + + # Inception doesn't support the "developer" message role. + supports_developer_role = False + + Settings = InceptionLLMSettings + _settings: Settings + + def __init__( + self, + *, + api_key: str, + base_url: str = "https://api.inceptionlabs.ai/v1", + model: str | None = None, + settings: Settings | None = None, + **kwargs, + ): + """Initialize the Inception LLM service. + + Args: + api_key: The API key for accessing Inception's API. + base_url: The base URL for Inception API. Defaults to "https://api.inceptionlabs.ai/v1". + model: The model identifier to use. Defaults to "mercury-2". + + .. deprecated:: 0.0.105 + Use ``settings=InceptionLLMService.Settings(model=...)`` instead. + + settings: Runtime-updatable settings. When provided alongside deprecated + parameters, ``settings`` values take precedence. + **kwargs: Additional keyword arguments passed to OpenAILLMService. + """ + default_settings = self.Settings(model="mercury-2", reasoning_effort=None, realtime=None) + + if model is not None: + self._warn_init_param_moved_to_settings("model", "model") + default_settings.model = model + + if settings is not None: + default_settings.apply_update(settings) + + super().__init__(api_key=api_key, base_url=base_url, settings=default_settings, **kwargs) + + def create_client(self, api_key=None, base_url=None, **kwargs): + """Create OpenAI-compatible client for Inception API endpoint. + + Args: + api_key: The API key for authentication. If None, uses instance default. + base_url: The base URL for the API. If None, uses instance default. + **kwargs: Additional keyword arguments for client configuration. + + Returns: + An OpenAI-compatible client configured for Inception's API. + """ + logger.debug(f"Creating Inception client with api {base_url}") + return super().create_client(api_key, base_url, **kwargs) + + def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict: + """Build parameters for Inception chat completion request. + + Extends the base OpenAI parameters with Inception-specific options + such as reasoning_effort and realtime. + + Args: + params_from_context: Parameters, derived from the LLM context, to + use for the chat completion. Contains messages, tools, and tool + choice. + + Returns: + Dictionary of parameters for the chat completion request. + """ + params = super().build_chat_completion_params(params_from_context) + + if ( + is_given(self._settings.reasoning_effort) + and self._settings.reasoning_effort is not None + ): + params["reasoning_effort"] = self._settings.reasoning_effort + + # realtime is Inception-specific and unknown to the OpenAI SDK, + # so it must be passed via extra_body to avoid validation errors. + extra_body = {} + if is_given(self._settings.realtime) and self._settings.realtime is not None: + extra_body["realtime"] = self._settings.realtime + + if extra_body: + params["extra_body"] = extra_body + + return params From 28f9203401424ca6687e1dc77fb8a9b7043ec66b Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 21 May 2026 11:22:16 -0400 Subject: [PATCH 2/2] Code review fixes --- .../function-calling-inception.py | 36 ++++++++++++------- scripts/evals/run-release-evals.py | 1 + src/pipecat/services/inception/llm.py | 22 +++++------- uv.lock | 2 +- 4 files changed, 33 insertions(+), 28 deletions(-) diff --git a/examples/function-calling/function-calling-inception.py b/examples/function-calling/function-calling-inception.py index 556cad7e7..416e39a25 100644 --- a/examples/function-calling/function-calling-inception.py +++ b/examples/function-calling/function-calling-inception.py @@ -39,6 +39,10 @@ async def fetch_weather_from_api(params: FunctionCallParams): await params.result_callback({"conditions": "nice", "temperature": "75"}) +async def fetch_restaurant_recommendation(params: FunctionCallParams): + await params.result_callback({"name": "The Golden Dragon"}) + + # We use lambdas to defer transport parameter creation until the transport # type is selected at runtime. transport_params = { @@ -72,23 +76,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): llm = InceptionLLMService( api_key=os.environ["INCEPTION_API_KEY"], settings=InceptionLLMService.Settings( - reasoning_effort="medium", - system_instruction="""You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way. - -You have one functions available: - -1. get_current_weather is used to get current weather information. - -Infer whether to use Fahrenheit or Celsius automatically based on the location, unless the user specifies a preference. - -Start by asking me for my location. Then, use 'get_weather_current' to give me a forecast. - - Respond to what the user said in a creative and helpful way.""", + reasoning_effort="instant", + system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.", ), ) # You can also register a function_name of None to get all functions # sent to the same callback with an additional function_name parameter. llm.register_function("get_current_weather", fetch_weather_from_api) + llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation) @llm.event_handler("on_function_calls_started") async def on_function_calls_started(service, function_calls): @@ -110,7 +105,19 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a }, required=["location", "format"], ) - tools = ToolsSchema(standard_tools=[weather_function]) + + restaurant_function = FunctionSchema( + name="get_restaurant_recommendation", + description="Get a restaurant recommendation", + properties={ + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + }, + required=["location"], + ) + tools = ToolsSchema(standard_tools=[weather_function, restaurant_function]) context = LLMContext(tools=tools) user_aggregator, assistant_aggregator = LLMContextAggregatorPair( @@ -143,6 +150,9 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a async def on_client_connected(transport, client): logger.info(f"Client connected") # Kick off the conversation. + context.add_message( + {"role": "developer", "content": "Please introduce yourself to the user."} + ) await task.queue_frames([LLMRunFrame()]) @transport.event_handler("on_client_disconnected") diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index c881d4432..96c89feb0 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -198,6 +198,7 @@ TESTS_FUNCTION_CALLING = [ ("function-calling/function-calling-sarvam.py", EVAL_WEATHER), ("function-calling/function-calling-novita.py", EVAL_WEATHER), ("function-calling/function-calling-deepseek.py", EVAL_WEATHER), + ("function-calling/function-calling-inception.py", EVAL_WEATHER), # Video ("function-calling/function-calling-anthropic-video.py", EVAL_VISION_CAMERA), ("function-calling/function-calling-aws-video.py", EVAL_VISION_CAMERA), diff --git a/src/pipecat/services/inception/llm.py b/src/pipecat/services/inception/llm.py index 72202cb7e..6a06810fa 100644 --- a/src/pipecat/services/inception/llm.py +++ b/src/pipecat/services/inception/llm.py @@ -24,7 +24,8 @@ class InceptionLLMSettings(BaseOpenAILLMService.Settings): Parameters: reasoning_effort: Controls how much reasoning the model applies. - One of "instant", "low", "medium", or "high". Defaults to "medium". + One of "instant", "low", "medium", or "high". When unset, the + parameter is omitted and Inception's server-side default applies. realtime: When True, reduces time to first diffusion block (TTFT). """ @@ -53,7 +54,6 @@ class InceptionLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.inceptionlabs.ai/v1", - model: str | None = None, settings: Settings | None = None, **kwargs, ): @@ -62,20 +62,14 @@ class InceptionLLMService(OpenAILLMService): Args: api_key: The API key for accessing Inception's API. base_url: The base URL for Inception API. Defaults to "https://api.inceptionlabs.ai/v1". - model: The model identifier to use. Defaults to "mercury-2". - - .. deprecated:: 0.0.105 - Use ``settings=InceptionLLMService.Settings(model=...)`` instead. - - settings: Runtime-updatable settings. When provided alongside deprecated - parameters, ``settings`` values take precedence. + settings: Runtime-updatable settings. **kwargs: Additional keyword arguments passed to OpenAILLMService. """ - default_settings = self.Settings(model="mercury-2", reasoning_effort=None, realtime=None) - - if model is not None: - self._warn_init_param_moved_to_settings("model", "model") - default_settings.model = model + default_settings = self.Settings( + model="mercury-2", + reasoning_effort=None, + realtime=None, + ) if settings is not None: default_settings.apply_update(settings) diff --git a/uv.lock b/uv.lock index de83cc67d..6972e2fae 100644 --- a/uv.lock +++ b/uv.lock @@ -4574,7 +4574,7 @@ requires-dist = [ { name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1,<1" }, { name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" }, ] -provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "langchain", "lemonslice", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "nebius", "neuphonic", "novita", "nvidia", "openai", "rnnoise", "openrouter", "perplexity", "piper", "qwen", "resembleai", "rime", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "smallest", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "vonage-video-connector", "webrtc", "websocket", "websockets-base", "whisper", "xai"] +provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inception", "inworld", "koala", "kokoro", "langchain", "lemonslice", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "nebius", "neuphonic", "novita", "nvidia", "openai", "rnnoise", "openrouter", "perplexity", "piper", "qwen", "resembleai", "rime", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "smallest", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "vonage-video-connector", "webrtc", "websocket", "websockets-base", "whisper", "xai"] [package.metadata.requires-dev] dev = [