Add NebiusLLMService with developer role and tool support fixes

- Add Nebius LLM service wrapping OpenAI-compatible Token Factory API - Set supports_developer_role = False (Nebius rejects developer role) - Default to openai/gpt-oss-120b model (supports function calling) - Add Nebius function-calling example and env.example entry - Fix Sarvam developer role support - Update examples to use developer role for intro messages
2026-03-29 08:50:01 -04:00
parent 39919f7889
commit 63254fe337
12 changed files with 216 additions and 53 deletions
--- a/env.example
+++ b/env.example
@@ -121,6 +121,9 @@ MINIMAX_GROUP_ID=...
 # Mistral
 MISTRAL_API_KEY=...

+# Nebius
+NEBIUS_API_KEY=...
+
 # Neuphonic
 NEUPHONIC_API_KEY=...

--- a/examples/foundational/07z-interruptible-sarvam-http.py
+++ b/examples/foundational/07z-interruptible-sarvam-http.py
@@ -111,7 +111,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            logger.info(f"Client connected")
            # Kick off the conversation.
            context.add_message(
-                {"role": "user", "content": "Please introduce yourself to the user."}
+                {"role": "developer", "content": "Please introduce yourself to the user."}
            )
            await task.queue_frames([LLMRunFrame()])

--- a/examples/foundational/07z-interruptible-sarvam.py
+++ b/examples/foundational/07z-interruptible-sarvam.py
@@ -104,7 +104,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    async def on_client_connected(transport, client):
        logger.info(f"Client connected")
        # Kick off the conversation.
-        context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
+        context.add_message(
+            {"role": "developer", "content": "Please introduce yourself to the user."}
+        )
        await task.queue_frames([LLMRunFrame()])

        # Optionally, you can wait for 30 seconds and then change the voice.
--- a/examples/foundational/14-function-calling.py
+++ b/examples/foundational/14-function-calling.py
@@ -148,6 +148,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    async def on_client_connected(transport, client):
        logger.info(f"Client connected")
        # Kick off the conversation.
+        context.add_message(
+            {"role": "developer", "content": "Please introduce yourself to the user."}
+        )
        await task.queue_frames([LLMRunFrame()])

    @transport.event_handler("on_client_disconnected")
--- a/examples/foundational/14b-function-calling-openai.py
+++ b/examples/foundational/14b-function-calling-openai.py
--- a/examples/foundational/14c-function-calling-together.py
+++ b/examples/foundational/14c-function-calling-together.py
@@ -131,6 +131,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    async def on_client_connected(transport, client):
        logger.info(f"Client connected")
        # Kick off the conversation.
+        context.add_message(
+            {"role": "developer", "content": "Please introduce yourself to the user."}
+        )
        await task.queue_frames([LLMRunFrame()])

    @transport.event_handler("on_client_disconnected")
--- a/examples/foundational/14v-function-calling-nebius.py
+++ b/examples/foundational/14v-function-calling-nebius.py
@@ -0,0 +1,175 @@
+#
+# Copyright (c) 2024-2026, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.adapters.schemas.function_schema import FunctionSchema
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    LLMUserAggregatorParams,
+)
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.llm_service import FunctionCallParams
+from pipecat.services.nebius.llm import NebiusLLMService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+
+async def fetch_weather_from_api(params: FunctionCallParams):
+    await params.result_callback({"conditions": "nice", "temperature": "75"})
+
+
+async def fetch_restaurant_recommendation(params: FunctionCallParams):
+    await params.result_callback({"name": "The Golden Dragon"})
+
+
+# We use lambdas to defer transport parameter creation until the transport
+# type is selected at runtime.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = CartesiaTTSService(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+        settings=CartesiaTTSService.Settings(
+            voice="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
+        ),
+    )
+
+    llm = NebiusLLMService(
+        api_key=os.getenv("NEBIUS_API_KEY"),
+        settings=NebiusLLMService.Settings(
+            system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
+        ),
+    )
+
+    # You can also register a function_name of None to get all functions
+    # sent to the same callback with an additional function_name parameter.
+    llm.register_function("get_current_weather", fetch_weather_from_api)
+    llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
+
+    @llm.event_handler("on_function_calls_started")
+    async def on_function_calls_started(service, function_calls):
+        await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
+
+    weather_function = FunctionSchema(
+        name="get_current_weather",
+        description="Get the current weather",
+        properties={
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA",
+            },
+            "format": {
+                "type": "string",
+                "enum": ["celsius", "fahrenheit"],
+                "description": "The temperature unit to use. Infer this from the user's location.",
+            },
+        },
+        required=["location", "format"],
+    )
+    restaurant_function = FunctionSchema(
+        name="get_restaurant_recommendation",
+        description="Get a restaurant recommendation",
+        properties={
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA",
+            },
+        },
+        required=["location"],
+    )
+    tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
+
+    context = LLMContext(tools=tools)
+    user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
+        context,
+        user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
+    )
+
+    pipeline = Pipeline(
+        [
+            transport.input(),
+            stt,
+            user_aggregator,
+            llm,
+            tts,
+            transport.output(),
+            assistant_aggregator,
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        context.add_message(
+            {"role": "developer", "content": "Please introduce yourself to the user."}
+        )
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/14y-function-calling-sarvam.py
+++ b/examples/foundational/14y-function-calling-sarvam.py
@@ -153,7 +153,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    async def on_client_connected(transport, client):
        logger.info(f"Client connected")
        # Kick off the conversation.
-        context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
+        context.add_message(
+            {"role": "developer", "content": "Please introduce yourself to the user."}
+        )
        await task.queue_frames([LLMRunFrame()])

    @transport.event_handler("on_client_disconnected")
--- a/scripts/evals/run-release-evals.py
+++ b/scripts/evals/run-release-evals.py
@@ -169,8 +169,11 @@ TESTS_12 = [
 TESTS_14 = [
    ("14-function-calling.py", EVAL_WEATHER),
    ("14-function-calling.py", EVAL_WEATHER_AND_RESTAURANT),
+    ("14-function-calling-openai-responses.py", EVAL_WEATHER),
+    ("14-function-calling-openai-responses.py", EVAL_WEATHER_AND_RESTAURANT),
    ("14a-function-calling-anthropic.py", EVAL_WEATHER),
    ("14a-function-calling-anthropic.py", EVAL_WEATHER_AND_RESTAURANT),
+    ("14b-function-calling-openai.py", EVAL_WEATHER),
    ("14e-function-calling-google.py", EVAL_WEATHER),
    ("14e-function-calling-google.py", EVAL_WEATHER_AND_RESTAURANT),
    ("14f-function-calling-groq.py", EVAL_WEATHER),
@@ -186,13 +189,11 @@ TESTS_14 = [
    ("14r-function-calling-aws.py", EVAL_WEATHER),
    ("14s-function-calling-sambanova.py", EVAL_WEATHER),
    ("14r-function-calling-aws.py", EVAL_WEATHER_AND_RESTAURANT),
-    ("14v-function-calling-openai.py", EVAL_WEATHER),
+    ("14v-function-calling-nebius.py", EVAL_WEATHER),
    ("14w-function-calling-mistral.py", EVAL_WEATHER),
    ("14x-function-calling-openpipe.py", EVAL_WEATHER),
    ("14y-function-calling-sarvam.py", EVAL_WEATHER),
    ("14z-function-calling-novita.py", EVAL_WEATHER),
-    ("14-function-calling-openai-responses.py", EVAL_WEATHER),
-    ("14-function-calling-openai-responses.py", EVAL_WEATHER_AND_RESTAURANT),
    # Video
    ("14d-function-calling-anthropic-video.py", EVAL_VISION_CAMERA),
    ("14d-function-calling-aws-video.py", EVAL_VISION_CAMERA),
--- a/src/pipecat/services/nebius/init.py
+++ b/src/pipecat/services/nebius/init.py
@@ -1,13 +0,0 @@
-#
-# Copyright (c) 2024-2026, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
-import sys
-
-from pipecat.services import DeprecatedModuleProxy
-
-from .llm import *
-
-sys.modules[__name__] = DeprecatedModuleProxy(globals(), "nebius", "nebius.llm")
--- a/src/pipecat/services/nebius/llm.py
+++ b/src/pipecat/services/nebius/llm.py
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #

-"""Nebius Token Factory LLM service implementation using OpenAI-compatible interface."""
+"""Nebius LLM service implementation using OpenAI-compatible interface."""

 from dataclasses import dataclass
 from typing import Optional
@@ -23,26 +23,16 @@ class NebiusLLMSettings(BaseOpenAILLMService.Settings):


 class NebiusLLMService(OpenAILLMService):
-    """A service for interacting with Nebius Token Factory's API using the OpenAI-compatible interface.
+    """A service for interacting with Nebius's API using the OpenAI-compatible interface.

-    This service extends OpenAILLMService to connect to Nebius Token Factory's API endpoint
-    while maintaining full compatibility with OpenAI's interface and functionality.
-
-    Nebius Token Factory provides access to open-source models including Meta Llama,
-    Qwen, and DeepSeek variants through an OpenAI-compatible REST API.
-
-    Set the ``NEBIUS_API_KEY`` environment variable or pass ``api_key`` directly.
-
-    Example::
-
-        service = NebiusLLMService(
-            api_key="your-nebius-api-key",
-            settings=NebiusLLMService.Settings(
-                model="meta-llama/Meta-Llama-3.1-70B-Instruct",
-            ),
-        )
+    This service extends OpenAILLMService to connect to Nebius's API endpoint while
+    maintaining full compatibility with OpenAI's interface and functionality.
    """

+    # Nebius doesn't support the "developer" message role.
+    # This value is used by BaseOpenAILLMService when calling the adapter.
+    supports_developer_role = False
+
    Settings = NebiusLLMSettings
    _settings: Settings

@@ -51,39 +41,32 @@ class NebiusLLMService(OpenAILLMService):
        *,
        api_key: str,
        base_url: str = "https://api.tokenfactory.nebius.com/v1/",
-        model: Optional[str] = None,
        settings: Optional[Settings] = None,
        **kwargs,
    ):
-        """Initialize the Nebius Token Factory LLM service.
+        """Initialize the Nebius LLM service.

        Args:
-            api_key: The API key for accessing Nebius Token Factory's API.
+            api_key: The API key for accessing Nebius's API.
            base_url: The base URL for the Nebius API. Defaults to
                ``"https://api.tokenfactory.nebius.com/v1/"``.
-            model: The model identifier to use. Defaults to
-                ``"meta-llama/Meta-Llama-3.1-8B-Instruct"``.
-
-                .. deprecated:: 0.0.109
-                    Use ``settings=NebiusLLMService.Settings(model=...)`` instead.
-
            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional keyword arguments passed to OpenAILLMService.
        """
-        default_settings = self.Settings(model="meta-llama/Meta-Llama-3.1-8B-Instruct")
-
-        if model is not None:
-            self._warn_init_param_moved_to_settings("model", "model")
-            default_settings.model = model
+        # Initialize default_settings with hardcoded defaults
+        default_settings = self.Settings(
+            model="openai/gpt-oss-120b",
+        )

+        # Apply settings delta (canonical API, always wins)
        if settings is not None:
            default_settings.apply_update(settings)

        super().__init__(api_key=api_key, base_url=base_url, settings=default_settings, **kwargs)

    def create_client(self, api_key=None, base_url=None, **kwargs):
-        """Create OpenAI-compatible client for Nebius Token Factory API endpoint.
+        """Create OpenAI-compatible client for Nebius API endpoint.

        Args:
            api_key: The API key for authentication. If None, uses instance default.
@@ -91,7 +74,7 @@ class NebiusLLMService(OpenAILLMService):
            **kwargs: Additional keyword arguments for client configuration.

        Returns:
-            An OpenAI-compatible client configured for Nebius Token Factory's API.
+            An OpenAI-compatible client configured for Nebius's API.
        """
        logger.debug(f"Creating Nebius client with api {base_url}")
        return super().create_client(api_key, base_url, **kwargs)
--- a/src/pipecat/services/sarvam/llm.py
+++ b/src/pipecat/services/sarvam/llm.py
@@ -42,6 +42,10 @@ class SarvamLLMService(OpenAILLMService):
    maintaining full compatibility with OpenAI's interface and functionality.
    """

+    # Sarvam doesn't support the "developer" message role.
+    # This value is used by BaseOpenAILLMService when calling the adapter.
+    supports_developer_role = False
+
    _SUPPORTED_MODELS = frozenset(
        {"sarvam-30b", "sarvam-30b-16k", "sarvam-105b", "sarvam-105b-32k"}
    )