Add NebiusLLMService with developer role and tool support fixes

- Add Nebius LLM service wrapping OpenAI-compatible Token Factory API
- Set supports_developer_role = False (Nebius rejects developer role)
- Default to openai/gpt-oss-120b model (supports function calling)
- Add Nebius function-calling example and env.example entry
- Fix Sarvam developer role support
- Update examples to use developer role for intro messages
This commit is contained in:
Mark Backman
2026-03-29 08:50:01 -04:00
parent 39919f7889
commit 63254fe337
12 changed files with 216 additions and 53 deletions

View File

@@ -121,6 +121,9 @@ MINIMAX_GROUP_ID=...
# Mistral
MISTRAL_API_KEY=...
# Nebius
NEBIUS_API_KEY=...
# Neuphonic
NEUPHONIC_API_KEY=...

View File

@@ -111,7 +111,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Client connected")
# Kick off the conversation.
context.add_message(
{"role": "user", "content": "Please introduce yourself to the user."}
{"role": "developer", "content": "Please introduce yourself to the user."}
)
await task.queue_frames([LLMRunFrame()])

View File

@@ -104,7 +104,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
context.add_message(
{"role": "developer", "content": "Please introduce yourself to the user."}
)
await task.queue_frames([LLMRunFrame()])
# Optionally, you can wait for 30 seconds and then change the voice.

View File

@@ -148,6 +148,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
context.add_message(
{"role": "developer", "content": "Please introduce yourself to the user."}
)
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")

View File

@@ -131,6 +131,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
context.add_message(
{"role": "developer", "content": "Please introduce yourself to the user."}
)
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")

View File

@@ -0,0 +1,175 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.adapters.schemas.function_schema import FunctionSchema
from pipecat.adapters.schemas.tools_schema import ToolsSchema
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
LLMUserAggregatorParams,
)
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.llm_service import FunctionCallParams
from pipecat.services.nebius.llm import NebiusLLMService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
load_dotenv(override=True)
async def fetch_weather_from_api(params: FunctionCallParams):
await params.result_callback({"conditions": "nice", "temperature": "75"})
async def fetch_restaurant_recommendation(params: FunctionCallParams):
await params.result_callback({"name": "The Golden Dragon"})
# We use lambdas to defer transport parameter creation until the transport
# type is selected at runtime.
transport_params = {
"daily": lambda: DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
"twilio": lambda: FastAPIWebsocketParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
settings=CartesiaTTSService.Settings(
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
),
)
llm = NebiusLLMService(
api_key=os.getenv("NEBIUS_API_KEY"),
settings=NebiusLLMService.Settings(
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
),
)
# You can also register a function_name of None to get all functions
# sent to the same callback with an additional function_name parameter.
llm.register_function("get_current_weather", fetch_weather_from_api)
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
@llm.event_handler("on_function_calls_started")
async def on_function_calls_started(service, function_calls):
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
weather_function = FunctionSchema(
name="get_current_weather",
description="Get the current weather",
properties={
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the user's location.",
},
},
required=["location", "format"],
)
restaurant_function = FunctionSchema(
name="get_restaurant_recommendation",
description="Get a restaurant recommendation",
properties={
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
},
required=["location"],
)
tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
context = LLMContext(tools=tools)
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
context,
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
)
pipeline = Pipeline(
[
transport.input(),
stt,
user_aggregator,
llm,
tts,
transport.output(),
assistant_aggregator,
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
context.add_message(
{"role": "developer", "content": "Please introduce yourself to the user."}
)
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -153,7 +153,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
context.add_message(
{"role": "developer", "content": "Please introduce yourself to the user."}
)
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")

View File

@@ -169,8 +169,11 @@ TESTS_12 = [
TESTS_14 = [
("14-function-calling.py", EVAL_WEATHER),
("14-function-calling.py", EVAL_WEATHER_AND_RESTAURANT),
("14-function-calling-openai-responses.py", EVAL_WEATHER),
("14-function-calling-openai-responses.py", EVAL_WEATHER_AND_RESTAURANT),
("14a-function-calling-anthropic.py", EVAL_WEATHER),
("14a-function-calling-anthropic.py", EVAL_WEATHER_AND_RESTAURANT),
("14b-function-calling-openai.py", EVAL_WEATHER),
("14e-function-calling-google.py", EVAL_WEATHER),
("14e-function-calling-google.py", EVAL_WEATHER_AND_RESTAURANT),
("14f-function-calling-groq.py", EVAL_WEATHER),
@@ -186,13 +189,11 @@ TESTS_14 = [
("14r-function-calling-aws.py", EVAL_WEATHER),
("14s-function-calling-sambanova.py", EVAL_WEATHER),
("14r-function-calling-aws.py", EVAL_WEATHER_AND_RESTAURANT),
("14v-function-calling-openai.py", EVAL_WEATHER),
("14v-function-calling-nebius.py", EVAL_WEATHER),
("14w-function-calling-mistral.py", EVAL_WEATHER),
("14x-function-calling-openpipe.py", EVAL_WEATHER),
("14y-function-calling-sarvam.py", EVAL_WEATHER),
("14z-function-calling-novita.py", EVAL_WEATHER),
("14-function-calling-openai-responses.py", EVAL_WEATHER),
("14-function-calling-openai-responses.py", EVAL_WEATHER_AND_RESTAURANT),
# Video
("14d-function-calling-anthropic-video.py", EVAL_VISION_CAMERA),
("14d-function-calling-aws-video.py", EVAL_VISION_CAMERA),

View File

@@ -1,13 +0,0 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import sys
from pipecat.services import DeprecatedModuleProxy
from .llm import *
sys.modules[__name__] = DeprecatedModuleProxy(globals(), "nebius", "nebius.llm")

View File

@@ -4,7 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
"""Nebius Token Factory LLM service implementation using OpenAI-compatible interface."""
"""Nebius LLM service implementation using OpenAI-compatible interface."""
from dataclasses import dataclass
from typing import Optional
@@ -23,26 +23,16 @@ class NebiusLLMSettings(BaseOpenAILLMService.Settings):
class NebiusLLMService(OpenAILLMService):
"""A service for interacting with Nebius Token Factory's API using the OpenAI-compatible interface.
"""A service for interacting with Nebius's API using the OpenAI-compatible interface.
This service extends OpenAILLMService to connect to Nebius Token Factory's API endpoint
while maintaining full compatibility with OpenAI's interface and functionality.
Nebius Token Factory provides access to open-source models including Meta Llama,
Qwen, and DeepSeek variants through an OpenAI-compatible REST API.
Set the ``NEBIUS_API_KEY`` environment variable or pass ``api_key`` directly.
Example::
service = NebiusLLMService(
api_key="your-nebius-api-key",
settings=NebiusLLMService.Settings(
model="meta-llama/Meta-Llama-3.1-70B-Instruct",
),
)
This service extends OpenAILLMService to connect to Nebius's API endpoint while
maintaining full compatibility with OpenAI's interface and functionality.
"""
# Nebius doesn't support the "developer" message role.
# This value is used by BaseOpenAILLMService when calling the adapter.
supports_developer_role = False
Settings = NebiusLLMSettings
_settings: Settings
@@ -51,39 +41,32 @@ class NebiusLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.tokenfactory.nebius.com/v1/",
model: Optional[str] = None,
settings: Optional[Settings] = None,
**kwargs,
):
"""Initialize the Nebius Token Factory LLM service.
"""Initialize the Nebius LLM service.
Args:
api_key: The API key for accessing Nebius Token Factory's API.
api_key: The API key for accessing Nebius's API.
base_url: The base URL for the Nebius API. Defaults to
``"https://api.tokenfactory.nebius.com/v1/"``.
model: The model identifier to use. Defaults to
``"meta-llama/Meta-Llama-3.1-8B-Instruct"``.
.. deprecated:: 0.0.109
Use ``settings=NebiusLLMService.Settings(model=...)`` instead.
settings: Runtime-updatable settings. When provided alongside deprecated
parameters, ``settings`` values take precedence.
**kwargs: Additional keyword arguments passed to OpenAILLMService.
"""
default_settings = self.Settings(model="meta-llama/Meta-Llama-3.1-8B-Instruct")
if model is not None:
self._warn_init_param_moved_to_settings("model", "model")
default_settings.model = model
# Initialize default_settings with hardcoded defaults
default_settings = self.Settings(
model="openai/gpt-oss-120b",
)
# Apply settings delta (canonical API, always wins)
if settings is not None:
default_settings.apply_update(settings)
super().__init__(api_key=api_key, base_url=base_url, settings=default_settings, **kwargs)
def create_client(self, api_key=None, base_url=None, **kwargs):
"""Create OpenAI-compatible client for Nebius Token Factory API endpoint.
"""Create OpenAI-compatible client for Nebius API endpoint.
Args:
api_key: The API key for authentication. If None, uses instance default.
@@ -91,7 +74,7 @@ class NebiusLLMService(OpenAILLMService):
**kwargs: Additional keyword arguments for client configuration.
Returns:
An OpenAI-compatible client configured for Nebius Token Factory's API.
An OpenAI-compatible client configured for Nebius's API.
"""
logger.debug(f"Creating Nebius client with api {base_url}")
return super().create_client(api_key, base_url, **kwargs)

View File

@@ -42,6 +42,10 @@ class SarvamLLMService(OpenAILLMService):
maintaining full compatibility with OpenAI's interface and functionality.
"""
# Sarvam doesn't support the "developer" message role.
# This value is used by BaseOpenAILLMService when calling the adapter.
supports_developer_role = False
_SUPPORTED_MODELS = frozenset(
{"sarvam-30b", "sarvam-30b-16k", "sarvam-105b", "sarvam-105b-32k"}
)