diff --git a/env.example b/env.example index 3723fbf26..183fd23ba 100644 --- a/env.example +++ b/env.example @@ -121,6 +121,9 @@ MINIMAX_GROUP_ID=... # Mistral MISTRAL_API_KEY=... +# Nebius +NEBIUS_API_KEY=... + # Neuphonic NEUPHONIC_API_KEY=... diff --git a/examples/foundational/07z-interruptible-sarvam-http.py b/examples/foundational/07z-interruptible-sarvam-http.py index 09938cf96..bca27e051 100644 --- a/examples/foundational/07z-interruptible-sarvam-http.py +++ b/examples/foundational/07z-interruptible-sarvam-http.py @@ -111,7 +111,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Client connected") # Kick off the conversation. context.add_message( - {"role": "user", "content": "Please introduce yourself to the user."} + {"role": "developer", "content": "Please introduce yourself to the user."} ) await task.queue_frames([LLMRunFrame()]) diff --git a/examples/foundational/07z-interruptible-sarvam.py b/examples/foundational/07z-interruptible-sarvam.py index 031b82116..bd007d89e 100644 --- a/examples/foundational/07z-interruptible-sarvam.py +++ b/examples/foundational/07z-interruptible-sarvam.py @@ -104,7 +104,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): async def on_client_connected(transport, client): logger.info(f"Client connected") # Kick off the conversation. - context.add_message({"role": "user", "content": "Please introduce yourself to the user."}) + context.add_message( + {"role": "developer", "content": "Please introduce yourself to the user."} + ) await task.queue_frames([LLMRunFrame()]) # Optionally, you can wait for 30 seconds and then change the voice. diff --git a/examples/foundational/14-function-calling.py b/examples/foundational/14-function-calling.py index 5001d5dad..085937da9 100644 --- a/examples/foundational/14-function-calling.py +++ b/examples/foundational/14-function-calling.py @@ -148,6 +148,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): async def on_client_connected(transport, client): logger.info(f"Client connected") # Kick off the conversation. + context.add_message( + {"role": "developer", "content": "Please introduce yourself to the user."} + ) await task.queue_frames([LLMRunFrame()]) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/14v-function-calling-openai.py b/examples/foundational/14b-function-calling-openai.py similarity index 100% rename from examples/foundational/14v-function-calling-openai.py rename to examples/foundational/14b-function-calling-openai.py diff --git a/examples/foundational/14c-function-calling-together.py b/examples/foundational/14c-function-calling-together.py index 927c30bbd..78da7fbe9 100644 --- a/examples/foundational/14c-function-calling-together.py +++ b/examples/foundational/14c-function-calling-together.py @@ -131,6 +131,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): async def on_client_connected(transport, client): logger.info(f"Client connected") # Kick off the conversation. + context.add_message( + {"role": "developer", "content": "Please introduce yourself to the user."} + ) await task.queue_frames([LLMRunFrame()]) @transport.event_handler("on_client_disconnected") diff --git a/examples/foundational/14v-function-calling-nebius.py b/examples/foundational/14v-function-calling-nebius.py new file mode 100644 index 000000000..4fe9a378a --- /dev/null +++ b/examples/foundational/14v-function-calling-nebius.py @@ -0,0 +1,175 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.adapters.schemas.function_schema import FunctionSchema +from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.llm_service import FunctionCallParams +from pipecat.services.nebius.llm import NebiusLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +async def fetch_weather_from_api(params: FunctionCallParams): + await params.result_callback({"conditions": "nice", "temperature": "75"}) + + +async def fetch_restaurant_recommendation(params: FunctionCallParams): + await params.result_callback({"name": "The Golden Dragon"}) + + +# We use lambdas to defer transport parameter creation until the transport +# type is selected at runtime. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + settings=CartesiaTTSService.Settings( + voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ), + ) + + llm = NebiusLLMService( + api_key=os.getenv("NEBIUS_API_KEY"), + settings=NebiusLLMService.Settings( + system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.", + ), + ) + + # You can also register a function_name of None to get all functions + # sent to the same callback with an additional function_name parameter. + llm.register_function("get_current_weather", fetch_weather_from_api) + llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation) + + @llm.event_handler("on_function_calls_started") + async def on_function_calls_started(service, function_calls): + await tts.queue_frame(TTSSpeakFrame("Let me check on that.")) + + weather_function = FunctionSchema( + name="get_current_weather", + description="Get the current weather", + properties={ + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "format": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "The temperature unit to use. Infer this from the user's location.", + }, + }, + required=["location", "format"], + ) + restaurant_function = FunctionSchema( + name="get_restaurant_recommendation", + description="Get a restaurant recommendation", + properties={ + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + }, + required=["location"], + ) + tools = ToolsSchema(standard_tools=[weather_function, restaurant_function]) + + context = LLMContext(tools=tools) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + # Kick off the conversation. + context.add_message( + {"role": "developer", "content": "Please introduce yourself to the user."} + ) + await task.queue_frames([LLMRunFrame()]) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/14y-function-calling-sarvam.py b/examples/foundational/14y-function-calling-sarvam.py index fece01a3a..bd3fa42fb 100644 --- a/examples/foundational/14y-function-calling-sarvam.py +++ b/examples/foundational/14y-function-calling-sarvam.py @@ -153,7 +153,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): async def on_client_connected(transport, client): logger.info(f"Client connected") # Kick off the conversation. - context.add_message({"role": "user", "content": "Please introduce yourself to the user."}) + context.add_message( + {"role": "developer", "content": "Please introduce yourself to the user."} + ) await task.queue_frames([LLMRunFrame()]) @transport.event_handler("on_client_disconnected") diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index 9632b5eae..c51409b7c 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -169,8 +169,11 @@ TESTS_12 = [ TESTS_14 = [ ("14-function-calling.py", EVAL_WEATHER), ("14-function-calling.py", EVAL_WEATHER_AND_RESTAURANT), + ("14-function-calling-openai-responses.py", EVAL_WEATHER), + ("14-function-calling-openai-responses.py", EVAL_WEATHER_AND_RESTAURANT), ("14a-function-calling-anthropic.py", EVAL_WEATHER), ("14a-function-calling-anthropic.py", EVAL_WEATHER_AND_RESTAURANT), + ("14b-function-calling-openai.py", EVAL_WEATHER), ("14e-function-calling-google.py", EVAL_WEATHER), ("14e-function-calling-google.py", EVAL_WEATHER_AND_RESTAURANT), ("14f-function-calling-groq.py", EVAL_WEATHER), @@ -186,13 +189,11 @@ TESTS_14 = [ ("14r-function-calling-aws.py", EVAL_WEATHER), ("14s-function-calling-sambanova.py", EVAL_WEATHER), ("14r-function-calling-aws.py", EVAL_WEATHER_AND_RESTAURANT), - ("14v-function-calling-openai.py", EVAL_WEATHER), + ("14v-function-calling-nebius.py", EVAL_WEATHER), ("14w-function-calling-mistral.py", EVAL_WEATHER), ("14x-function-calling-openpipe.py", EVAL_WEATHER), ("14y-function-calling-sarvam.py", EVAL_WEATHER), ("14z-function-calling-novita.py", EVAL_WEATHER), - ("14-function-calling-openai-responses.py", EVAL_WEATHER), - ("14-function-calling-openai-responses.py", EVAL_WEATHER_AND_RESTAURANT), # Video ("14d-function-calling-anthropic-video.py", EVAL_VISION_CAMERA), ("14d-function-calling-aws-video.py", EVAL_VISION_CAMERA), diff --git a/src/pipecat/services/nebius/__init__.py b/src/pipecat/services/nebius/__init__.py index 8be606de9..e69de29bb 100644 --- a/src/pipecat/services/nebius/__init__.py +++ b/src/pipecat/services/nebius/__init__.py @@ -1,13 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import sys - -from pipecat.services import DeprecatedModuleProxy - -from .llm import * - -sys.modules[__name__] = DeprecatedModuleProxy(globals(), "nebius", "nebius.llm") diff --git a/src/pipecat/services/nebius/llm.py b/src/pipecat/services/nebius/llm.py index 1efe8df45..aa26e776f 100644 --- a/src/pipecat/services/nebius/llm.py +++ b/src/pipecat/services/nebius/llm.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: BSD 2-Clause License # -"""Nebius Token Factory LLM service implementation using OpenAI-compatible interface.""" +"""Nebius LLM service implementation using OpenAI-compatible interface.""" from dataclasses import dataclass from typing import Optional @@ -23,26 +23,16 @@ class NebiusLLMSettings(BaseOpenAILLMService.Settings): class NebiusLLMService(OpenAILLMService): - """A service for interacting with Nebius Token Factory's API using the OpenAI-compatible interface. + """A service for interacting with Nebius's API using the OpenAI-compatible interface. - This service extends OpenAILLMService to connect to Nebius Token Factory's API endpoint - while maintaining full compatibility with OpenAI's interface and functionality. - - Nebius Token Factory provides access to open-source models including Meta Llama, - Qwen, and DeepSeek variants through an OpenAI-compatible REST API. - - Set the ``NEBIUS_API_KEY`` environment variable or pass ``api_key`` directly. - - Example:: - - service = NebiusLLMService( - api_key="your-nebius-api-key", - settings=NebiusLLMService.Settings( - model="meta-llama/Meta-Llama-3.1-70B-Instruct", - ), - ) + This service extends OpenAILLMService to connect to Nebius's API endpoint while + maintaining full compatibility with OpenAI's interface and functionality. """ + # Nebius doesn't support the "developer" message role. + # This value is used by BaseOpenAILLMService when calling the adapter. + supports_developer_role = False + Settings = NebiusLLMSettings _settings: Settings @@ -51,39 +41,32 @@ class NebiusLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.tokenfactory.nebius.com/v1/", - model: Optional[str] = None, settings: Optional[Settings] = None, **kwargs, ): - """Initialize the Nebius Token Factory LLM service. + """Initialize the Nebius LLM service. Args: - api_key: The API key for accessing Nebius Token Factory's API. + api_key: The API key for accessing Nebius's API. base_url: The base URL for the Nebius API. Defaults to ``"https://api.tokenfactory.nebius.com/v1/"``. - model: The model identifier to use. Defaults to - ``"meta-llama/Meta-Llama-3.1-8B-Instruct"``. - - .. deprecated:: 0.0.109 - Use ``settings=NebiusLLMService.Settings(model=...)`` instead. - settings: Runtime-updatable settings. When provided alongside deprecated parameters, ``settings`` values take precedence. **kwargs: Additional keyword arguments passed to OpenAILLMService. """ - default_settings = self.Settings(model="meta-llama/Meta-Llama-3.1-8B-Instruct") - - if model is not None: - self._warn_init_param_moved_to_settings("model", "model") - default_settings.model = model + # Initialize default_settings with hardcoded defaults + default_settings = self.Settings( + model="openai/gpt-oss-120b", + ) + # Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) super().__init__(api_key=api_key, base_url=base_url, settings=default_settings, **kwargs) def create_client(self, api_key=None, base_url=None, **kwargs): - """Create OpenAI-compatible client for Nebius Token Factory API endpoint. + """Create OpenAI-compatible client for Nebius API endpoint. Args: api_key: The API key for authentication. If None, uses instance default. @@ -91,7 +74,7 @@ class NebiusLLMService(OpenAILLMService): **kwargs: Additional keyword arguments for client configuration. Returns: - An OpenAI-compatible client configured for Nebius Token Factory's API. + An OpenAI-compatible client configured for Nebius's API. """ logger.debug(f"Creating Nebius client with api {base_url}") return super().create_client(api_key, base_url, **kwargs) diff --git a/src/pipecat/services/sarvam/llm.py b/src/pipecat/services/sarvam/llm.py index c1fc6990e..ce353ff2b 100644 --- a/src/pipecat/services/sarvam/llm.py +++ b/src/pipecat/services/sarvam/llm.py @@ -42,6 +42,10 @@ class SarvamLLMService(OpenAILLMService): maintaining full compatibility with OpenAI's interface and functionality. """ + # Sarvam doesn't support the "developer" message role. + # This value is used by BaseOpenAILLMService when calling the adapter. + supports_developer_role = False + _SUPPORTED_MODELS = frozenset( {"sarvam-30b", "sarvam-30b-16k", "sarvam-105b", "sarvam-105b-32k"} )