Add NebiusLLMService with developer role and tool support fixes
- Add Nebius LLM service wrapping OpenAI-compatible Token Factory API - Set supports_developer_role = False (Nebius rejects developer role) - Default to openai/gpt-oss-120b model (supports function calling) - Add Nebius function-calling example and env.example entry - Fix Sarvam developer role support - Update examples to use developer role for intro messages
This commit is contained in:
@@ -121,6 +121,9 @@ MINIMAX_GROUP_ID=...
|
||||
# Mistral
|
||||
MISTRAL_API_KEY=...
|
||||
|
||||
# Nebius
|
||||
NEBIUS_API_KEY=...
|
||||
|
||||
# Neuphonic
|
||||
NEUPHONIC_API_KEY=...
|
||||
|
||||
|
||||
@@ -111,7 +111,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "user", "content": "Please introduce yourself to the user."}
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
|
||||
@@ -104,7 +104,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
# Optionally, you can wait for 30 seconds and then change the voice.
|
||||
|
||||
@@ -148,6 +148,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -131,6 +131,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
175
examples/foundational/14v-function-calling-nebius.py
Normal file
175
examples/foundational/14v-function-calling-nebius.py
Normal file
@@ -0,0 +1,175 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.nebius.llm import NebiusLLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def fetch_restaurant_recommendation(params: FunctionCallParams):
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = NebiusLLMService(
|
||||
api_key=os.getenv("NEBIUS_API_KEY"),
|
||||
settings=NebiusLLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
)
|
||||
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
restaurant_function = FunctionSchema(
|
||||
name="get_restaurant_recommendation",
|
||||
description="Get a restaurant recommendation",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
|
||||
|
||||
context = LLMContext(tools=tools)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -153,7 +153,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -169,8 +169,11 @@ TESTS_12 = [
|
||||
TESTS_14 = [
|
||||
("14-function-calling.py", EVAL_WEATHER),
|
||||
("14-function-calling.py", EVAL_WEATHER_AND_RESTAURANT),
|
||||
("14-function-calling-openai-responses.py", EVAL_WEATHER),
|
||||
("14-function-calling-openai-responses.py", EVAL_WEATHER_AND_RESTAURANT),
|
||||
("14a-function-calling-anthropic.py", EVAL_WEATHER),
|
||||
("14a-function-calling-anthropic.py", EVAL_WEATHER_AND_RESTAURANT),
|
||||
("14b-function-calling-openai.py", EVAL_WEATHER),
|
||||
("14e-function-calling-google.py", EVAL_WEATHER),
|
||||
("14e-function-calling-google.py", EVAL_WEATHER_AND_RESTAURANT),
|
||||
("14f-function-calling-groq.py", EVAL_WEATHER),
|
||||
@@ -186,13 +189,11 @@ TESTS_14 = [
|
||||
("14r-function-calling-aws.py", EVAL_WEATHER),
|
||||
("14s-function-calling-sambanova.py", EVAL_WEATHER),
|
||||
("14r-function-calling-aws.py", EVAL_WEATHER_AND_RESTAURANT),
|
||||
("14v-function-calling-openai.py", EVAL_WEATHER),
|
||||
("14v-function-calling-nebius.py", EVAL_WEATHER),
|
||||
("14w-function-calling-mistral.py", EVAL_WEATHER),
|
||||
("14x-function-calling-openpipe.py", EVAL_WEATHER),
|
||||
("14y-function-calling-sarvam.py", EVAL_WEATHER),
|
||||
("14z-function-calling-novita.py", EVAL_WEATHER),
|
||||
("14-function-calling-openai-responses.py", EVAL_WEATHER),
|
||||
("14-function-calling-openai-responses.py", EVAL_WEATHER_AND_RESTAURANT),
|
||||
# Video
|
||||
("14d-function-calling-anthropic-video.py", EVAL_VISION_CAMERA),
|
||||
("14d-function-calling-aws-video.py", EVAL_VISION_CAMERA),
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import sys
|
||||
|
||||
from pipecat.services import DeprecatedModuleProxy
|
||||
|
||||
from .llm import *
|
||||
|
||||
sys.modules[__name__] = DeprecatedModuleProxy(globals(), "nebius", "nebius.llm")
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Nebius Token Factory LLM service implementation using OpenAI-compatible interface."""
|
||||
"""Nebius LLM service implementation using OpenAI-compatible interface."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
@@ -23,26 +23,16 @@ class NebiusLLMSettings(BaseOpenAILLMService.Settings):
|
||||
|
||||
|
||||
class NebiusLLMService(OpenAILLMService):
|
||||
"""A service for interacting with Nebius Token Factory's API using the OpenAI-compatible interface.
|
||||
"""A service for interacting with Nebius's API using the OpenAI-compatible interface.
|
||||
|
||||
This service extends OpenAILLMService to connect to Nebius Token Factory's API endpoint
|
||||
while maintaining full compatibility with OpenAI's interface and functionality.
|
||||
|
||||
Nebius Token Factory provides access to open-source models including Meta Llama,
|
||||
Qwen, and DeepSeek variants through an OpenAI-compatible REST API.
|
||||
|
||||
Set the ``NEBIUS_API_KEY`` environment variable or pass ``api_key`` directly.
|
||||
|
||||
Example::
|
||||
|
||||
service = NebiusLLMService(
|
||||
api_key="your-nebius-api-key",
|
||||
settings=NebiusLLMService.Settings(
|
||||
model="meta-llama/Meta-Llama-3.1-70B-Instruct",
|
||||
),
|
||||
)
|
||||
This service extends OpenAILLMService to connect to Nebius's API endpoint while
|
||||
maintaining full compatibility with OpenAI's interface and functionality.
|
||||
"""
|
||||
|
||||
# Nebius doesn't support the "developer" message role.
|
||||
# This value is used by BaseOpenAILLMService when calling the adapter.
|
||||
supports_developer_role = False
|
||||
|
||||
Settings = NebiusLLMSettings
|
||||
_settings: Settings
|
||||
|
||||
@@ -51,39 +41,32 @@ class NebiusLLMService(OpenAILLMService):
|
||||
*,
|
||||
api_key: str,
|
||||
base_url: str = "https://api.tokenfactory.nebius.com/v1/",
|
||||
model: Optional[str] = None,
|
||||
settings: Optional[Settings] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the Nebius Token Factory LLM service.
|
||||
"""Initialize the Nebius LLM service.
|
||||
|
||||
Args:
|
||||
api_key: The API key for accessing Nebius Token Factory's API.
|
||||
api_key: The API key for accessing Nebius's API.
|
||||
base_url: The base URL for the Nebius API. Defaults to
|
||||
``"https://api.tokenfactory.nebius.com/v1/"``.
|
||||
model: The model identifier to use. Defaults to
|
||||
``"meta-llama/Meta-Llama-3.1-8B-Instruct"``.
|
||||
|
||||
.. deprecated:: 0.0.109
|
||||
Use ``settings=NebiusLLMService.Settings(model=...)`` instead.
|
||||
|
||||
settings: Runtime-updatable settings. When provided alongside deprecated
|
||||
parameters, ``settings`` values take precedence.
|
||||
**kwargs: Additional keyword arguments passed to OpenAILLMService.
|
||||
"""
|
||||
default_settings = self.Settings(model="meta-llama/Meta-Llama-3.1-8B-Instruct")
|
||||
|
||||
if model is not None:
|
||||
self._warn_init_param_moved_to_settings("model", "model")
|
||||
default_settings.model = model
|
||||
# Initialize default_settings with hardcoded defaults
|
||||
default_settings = self.Settings(
|
||||
model="openai/gpt-oss-120b",
|
||||
)
|
||||
|
||||
# Apply settings delta (canonical API, always wins)
|
||||
if settings is not None:
|
||||
default_settings.apply_update(settings)
|
||||
|
||||
super().__init__(api_key=api_key, base_url=base_url, settings=default_settings, **kwargs)
|
||||
|
||||
def create_client(self, api_key=None, base_url=None, **kwargs):
|
||||
"""Create OpenAI-compatible client for Nebius Token Factory API endpoint.
|
||||
"""Create OpenAI-compatible client for Nebius API endpoint.
|
||||
|
||||
Args:
|
||||
api_key: The API key for authentication. If None, uses instance default.
|
||||
@@ -91,7 +74,7 @@ class NebiusLLMService(OpenAILLMService):
|
||||
**kwargs: Additional keyword arguments for client configuration.
|
||||
|
||||
Returns:
|
||||
An OpenAI-compatible client configured for Nebius Token Factory's API.
|
||||
An OpenAI-compatible client configured for Nebius's API.
|
||||
"""
|
||||
logger.debug(f"Creating Nebius client with api {base_url}")
|
||||
return super().create_client(api_key, base_url, **kwargs)
|
||||
|
||||
@@ -42,6 +42,10 @@ class SarvamLLMService(OpenAILLMService):
|
||||
maintaining full compatibility with OpenAI's interface and functionality.
|
||||
"""
|
||||
|
||||
# Sarvam doesn't support the "developer" message role.
|
||||
# This value is used by BaseOpenAILLMService when calling the adapter.
|
||||
supports_developer_role = False
|
||||
|
||||
_SUPPORTED_MODELS = frozenset(
|
||||
{"sarvam-30b", "sarvam-30b-16k", "sarvam-105b", "sarvam-105b-32k"}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user