Merge pull request #4423 from joycech333/feat/inception-llm-service
feat: add Inception LLM service with Mercury 2 support
This commit is contained in:
@@ -92,7 +92,7 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
|||||||
| Category | Services |
|
| Category | Services |
|
||||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/api-reference/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/api-reference/server/services/stt/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/api-reference/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/api-reference/server/services/stt/gladia), [Google](https://docs.pipecat.ai/api-reference/server/services/stt/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/api-reference/server/services/stt/groq), [Mistral](https://docs.pipecat.ai/api-reference/server/services/stt/mistral), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/stt/nvidia), [OpenAI (Whisper)](https://docs.pipecat.ai/api-reference/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/api-reference/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/api-reference/server/services/stt/whisper), [xAI](https://docs.pipecat.ai/api-reference/server/services/stt/xai) |
|
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/api-reference/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/api-reference/server/services/stt/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/api-reference/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/api-reference/server/services/stt/gladia), [Google](https://docs.pipecat.ai/api-reference/server/services/stt/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/api-reference/server/services/stt/groq), [Mistral](https://docs.pipecat.ai/api-reference/server/services/stt/mistral), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/stt/nvidia), [OpenAI (Whisper)](https://docs.pipecat.ai/api-reference/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/api-reference/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/api-reference/server/services/stt/whisper), [xAI](https://docs.pipecat.ai/api-reference/server/services/stt/xai) |
|
||||||
| LLMs | [Anthropic](https://docs.pipecat.ai/api-reference/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/api-reference/server/services/llm/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/api-reference/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/api-reference/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/api-reference/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/api-reference/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/api-reference/server/services/llm/grok), [Groq](https://docs.pipecat.ai/api-reference/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/api-reference/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/api-reference/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/api-reference/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/api-reference/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/api-reference/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/llm/openai), [OpenAI Responses](https://docs.pipecat.ai/api-reference/server/services/llm/openai-responses), [OpenRouter](https://docs.pipecat.ai/api-reference/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/api-reference/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/api-reference/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/api-reference/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/api-reference/server/services/llm/together) |
|
| LLMs | [Anthropic](https://docs.pipecat.ai/api-reference/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/api-reference/server/services/llm/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/api-reference/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/api-reference/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/api-reference/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/api-reference/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/api-reference/server/services/llm/grok), [Groq](https://docs.pipecat.ai/api-reference/server/services/llm/groq), [Inception](https://docs.pipecat.ai/api-reference/server/services/llm/inception), [Mistral](https://docs.pipecat.ai/api-reference/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/api-reference/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/api-reference/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/api-reference/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/api-reference/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/llm/openai), [OpenAI Responses](https://docs.pipecat.ai/api-reference/server/services/llm/openai-responses), [OpenRouter](https://docs.pipecat.ai/api-reference/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/api-reference/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/api-reference/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/api-reference/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/api-reference/server/services/llm/together) |
|
||||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/api-reference/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/api-reference/server/services/tts/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/api-reference/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/api-reference/server/services/tts/fish), [Google](https://docs.pipecat.ai/api-reference/server/services/tts/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/api-reference/server/services/tts/groq), [Hume](https://docs.pipecat.ai/api-reference/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/api-reference/server/services/tts/inworld), [Kokoro](https://docs.pipecat.ai/api-reference/server/services/tts/kokoro), [LMNT](https://docs.pipecat.ai/api-reference/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/api-reference/server/services/tts/minimax), [Mistral](https://docs.pipecat.ai/api-reference/server/services/tts/mistral), [Neuphonic](https://docs.pipecat.ai/api-reference/server/services/tts/neuphonic), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/tts/nvidia), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/tts/openai), [Piper](https://docs.pipecat.ai/api-reference/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/api-reference/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/api-reference/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/api-reference/server/services/tts/smallest), [Soniox](https://docs.pipecat.ai/api-reference/server/services/tts/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/api-reference/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/api-reference/server/services/tts/xtts) |
|
| Text-to-Speech | [Async](https://docs.pipecat.ai/api-reference/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/api-reference/server/services/tts/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/api-reference/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/api-reference/server/services/tts/fish), [Google](https://docs.pipecat.ai/api-reference/server/services/tts/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/api-reference/server/services/tts/groq), [Hume](https://docs.pipecat.ai/api-reference/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/api-reference/server/services/tts/inworld), [Kokoro](https://docs.pipecat.ai/api-reference/server/services/tts/kokoro), [LMNT](https://docs.pipecat.ai/api-reference/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/api-reference/server/services/tts/minimax), [Mistral](https://docs.pipecat.ai/api-reference/server/services/tts/mistral), [Neuphonic](https://docs.pipecat.ai/api-reference/server/services/tts/neuphonic), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/tts/nvidia), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/tts/openai), [Piper](https://docs.pipecat.ai/api-reference/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/api-reference/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/api-reference/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/api-reference/server/services/tts/smallest), [Soniox](https://docs.pipecat.ai/api-reference/server/services/tts/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/api-reference/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/api-reference/server/services/tts/xtts) |
|
||||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/api-reference/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/api-reference/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/api-reference/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/api-reference/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/api-reference/server/services/s2s/ultravox), |
|
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/api-reference/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/api-reference/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/api-reference/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/api-reference/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/api-reference/server/services/s2s/ultravox), |
|
||||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/api-reference/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/api-reference/server/services/transport/small-webrtc), [Vonage (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/vonage), [WebSocket Server](https://docs.pipecat.ai/api-reference/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/api-reference/server/services/transport/whatsapp), Local |
|
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/api-reference/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/api-reference/server/services/transport/small-webrtc), [Vonage (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/vonage), [WebSocket Server](https://docs.pipecat.ai/api-reference/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/api-reference/server/services/transport/whatsapp), Local |
|
||||||
|
|||||||
1
changelog/4423.added.md
Normal file
1
changelog/4423.added.md
Normal file
@@ -0,0 +1 @@
|
|||||||
|
- Added `InceptionLLMService` for Inception's Mercury 2 diffusion reasoning model, with support for `reasoning_effort` and `realtime` settings.
|
||||||
@@ -91,6 +91,9 @@ HEYGEN_LIVE_AVATAR_API_KEY=...
|
|||||||
HUME_API_KEY=...
|
HUME_API_KEY=...
|
||||||
HUME_VOICE_ID=...
|
HUME_VOICE_ID=...
|
||||||
|
|
||||||
|
# Inception
|
||||||
|
INCEPTION_API_KEY=...
|
||||||
|
|
||||||
# Inworld
|
# Inworld
|
||||||
INWORLD_API_KEY=...
|
INWORLD_API_KEY=...
|
||||||
|
|
||||||
|
|||||||
177
examples/function-calling/function-calling-inception.py
Normal file
177
examples/function-calling/function-calling-inception.py
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024-2026, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||||
|
from pipecat.processors.aggregators.llm_response_universal import (
|
||||||
|
LLMContextAggregatorPair,
|
||||||
|
LLMUserAggregatorParams,
|
||||||
|
)
|
||||||
|
from pipecat.runner.types import RunnerArguments
|
||||||
|
from pipecat.runner.utils import create_transport
|
||||||
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
|
from pipecat.services.inception.llm import InceptionLLMService
|
||||||
|
from pipecat.services.llm_service import FunctionCallParams
|
||||||
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
|
from pipecat.transports.daily.transport import DailyParams
|
||||||
|
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||||
|
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_restaurant_recommendation(params: FunctionCallParams):
|
||||||
|
await params.result_callback({"name": "The Golden Dragon"})
|
||||||
|
|
||||||
|
|
||||||
|
# We use lambdas to defer transport parameter creation until the transport
|
||||||
|
# type is selected at runtime.
|
||||||
|
transport_params = {
|
||||||
|
"daily": lambda: DailyParams(
|
||||||
|
audio_in_enabled=True,
|
||||||
|
audio_out_enabled=True,
|
||||||
|
),
|
||||||
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
|
audio_in_enabled=True,
|
||||||
|
audio_out_enabled=True,
|
||||||
|
),
|
||||||
|
"webrtc": lambda: TransportParams(
|
||||||
|
audio_in_enabled=True,
|
||||||
|
audio_out_enabled=True,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||||
|
logger.info(f"Starting bot")
|
||||||
|
|
||||||
|
stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"])
|
||||||
|
|
||||||
|
tts = CartesiaTTSService(
|
||||||
|
api_key=os.environ["CARTESIA_API_KEY"],
|
||||||
|
settings=CartesiaTTSService.Settings(
|
||||||
|
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = InceptionLLMService(
|
||||||
|
api_key=os.environ["INCEPTION_API_KEY"],
|
||||||
|
settings=InceptionLLMService.Settings(
|
||||||
|
reasoning_effort="instant",
|
||||||
|
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
# You can also register a function_name of None to get all functions
|
||||||
|
# sent to the same callback with an additional function_name parameter.
|
||||||
|
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||||
|
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
|
||||||
|
|
||||||
|
@llm.event_handler("on_function_calls_started")
|
||||||
|
async def on_function_calls_started(service, function_calls):
|
||||||
|
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||||
|
|
||||||
|
weather_function = FunctionSchema(
|
||||||
|
name="get_current_weather",
|
||||||
|
description="Get the current weather",
|
||||||
|
properties={
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"format": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required=["location", "format"],
|
||||||
|
)
|
||||||
|
|
||||||
|
restaurant_function = FunctionSchema(
|
||||||
|
name="get_restaurant_recommendation",
|
||||||
|
description="Get a restaurant recommendation",
|
||||||
|
properties={
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required=["location"],
|
||||||
|
)
|
||||||
|
tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
|
||||||
|
|
||||||
|
context = LLMContext(tools=tools)
|
||||||
|
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||||
|
context,
|
||||||
|
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||||
|
)
|
||||||
|
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[
|
||||||
|
transport.input(),
|
||||||
|
stt,
|
||||||
|
user_aggregator,
|
||||||
|
llm,
|
||||||
|
tts,
|
||||||
|
transport.output(),
|
||||||
|
assistant_aggregator,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
enable_metrics=True,
|
||||||
|
enable_usage_metrics=True,
|
||||||
|
),
|
||||||
|
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||||
|
)
|
||||||
|
|
||||||
|
@transport.event_handler("on_client_connected")
|
||||||
|
async def on_client_connected(transport, client):
|
||||||
|
logger.info(f"Client connected")
|
||||||
|
# Kick off the conversation.
|
||||||
|
context.add_message(
|
||||||
|
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||||
|
)
|
||||||
|
await task.queue_frames([LLMRunFrame()])
|
||||||
|
|
||||||
|
@transport.event_handler("on_client_disconnected")
|
||||||
|
async def on_client_disconnected(transport, client):
|
||||||
|
logger.info(f"Client disconnected")
|
||||||
|
await task.cancel()
|
||||||
|
|
||||||
|
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
async def bot(runner_args: RunnerArguments):
|
||||||
|
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||||
|
transport = await create_transport(runner_args, transport_params)
|
||||||
|
await run_bot(transport, runner_args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
from pipecat.runner.run import main
|
||||||
|
|
||||||
|
main()
|
||||||
@@ -77,6 +77,7 @@ groq = [ "groq>=0.23.0,<2" ]
|
|||||||
gstreamer = [ "pygobject~=3.50.0" ]
|
gstreamer = [ "pygobject~=3.50.0" ]
|
||||||
heygen = [ "livekit>=1.0.13,<2", "pipecat-ai[websockets-base]" ]
|
heygen = [ "livekit>=1.0.13,<2", "pipecat-ai[websockets-base]" ]
|
||||||
hume = [ "hume>=0.11.2,<1" ]
|
hume = [ "hume>=0.11.2,<1" ]
|
||||||
|
inception = []
|
||||||
inworld = [ "pipecat-ai[websockets-base]" ]
|
inworld = [ "pipecat-ai[websockets-base]" ]
|
||||||
koala = [ "pvkoala~=2.0.3" ]
|
koala = [ "pvkoala~=2.0.3" ]
|
||||||
kokoro = [ "kokoro-onnx>=0.5.0,<1", "requests>=2.32.5,<3" ]
|
kokoro = [ "kokoro-onnx>=0.5.0,<1", "requests>=2.32.5,<3" ]
|
||||||
|
|||||||
@@ -198,6 +198,7 @@ TESTS_FUNCTION_CALLING = [
|
|||||||
("function-calling/function-calling-sarvam.py", EVAL_WEATHER),
|
("function-calling/function-calling-sarvam.py", EVAL_WEATHER),
|
||||||
("function-calling/function-calling-novita.py", EVAL_WEATHER),
|
("function-calling/function-calling-novita.py", EVAL_WEATHER),
|
||||||
("function-calling/function-calling-deepseek.py", EVAL_WEATHER),
|
("function-calling/function-calling-deepseek.py", EVAL_WEATHER),
|
||||||
|
("function-calling/function-calling-inception.py", EVAL_WEATHER),
|
||||||
# Video
|
# Video
|
||||||
("function-calling/function-calling-anthropic-video.py", EVAL_VISION_CAMERA),
|
("function-calling/function-calling-anthropic-video.py", EVAL_VISION_CAMERA),
|
||||||
("function-calling/function-calling-aws-video.py", EVAL_VISION_CAMERA),
|
("function-calling/function-calling-aws-video.py", EVAL_VISION_CAMERA),
|
||||||
|
|||||||
0
src/pipecat/services/inception/__init__.py
Normal file
0
src/pipecat/services/inception/__init__.py
Normal file
124
src/pipecat/services/inception/llm.py
Normal file
124
src/pipecat/services/inception/llm.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024-2026, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
"""Inception LLM service implementation using OpenAI-compatible interface."""
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
|
||||||
|
from pipecat.services.openai.base_llm import BaseOpenAILLMService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
|
from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN
|
||||||
|
from pipecat.services.settings import _NotGiven, is_given
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class InceptionLLMSettings(BaseOpenAILLMService.Settings):
|
||||||
|
"""Settings for InceptionLLMService.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
reasoning_effort: Controls how much reasoning the model applies.
|
||||||
|
One of "instant", "low", "medium", or "high". When unset, the
|
||||||
|
parameter is omitted and Inception's server-side default applies.
|
||||||
|
realtime: When True, reduces time to first diffusion block (TTFT).
|
||||||
|
"""
|
||||||
|
|
||||||
|
reasoning_effort: Literal["instant", "low", "medium", "high"] | None | _NotGiven = field(
|
||||||
|
default_factory=lambda: _NOT_GIVEN
|
||||||
|
)
|
||||||
|
realtime: bool | None | _NotGiven = field(default_factory=lambda: _NOT_GIVEN)
|
||||||
|
|
||||||
|
|
||||||
|
class InceptionLLMService(OpenAILLMService):
|
||||||
|
"""A service for interacting with Inception's API using the OpenAI-compatible interface.
|
||||||
|
|
||||||
|
This service extends OpenAILLMService to connect to Inception's API endpoint while
|
||||||
|
maintaining full compatibility with OpenAI's interface and functionality.
|
||||||
|
Supports Mercury-2, Inception's diffusion-based reasoning model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Inception doesn't support the "developer" message role.
|
||||||
|
supports_developer_role = False
|
||||||
|
|
||||||
|
Settings = InceptionLLMSettings
|
||||||
|
_settings: Settings
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str,
|
||||||
|
base_url: str = "https://api.inceptionlabs.ai/v1",
|
||||||
|
settings: Settings | None = None,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
"""Initialize the Inception LLM service.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key: The API key for accessing Inception's API.
|
||||||
|
base_url: The base URL for Inception API. Defaults to "https://api.inceptionlabs.ai/v1".
|
||||||
|
settings: Runtime-updatable settings.
|
||||||
|
**kwargs: Additional keyword arguments passed to OpenAILLMService.
|
||||||
|
"""
|
||||||
|
default_settings = self.Settings(
|
||||||
|
model="mercury-2",
|
||||||
|
reasoning_effort=None,
|
||||||
|
realtime=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
if settings is not None:
|
||||||
|
default_settings.apply_update(settings)
|
||||||
|
|
||||||
|
super().__init__(api_key=api_key, base_url=base_url, settings=default_settings, **kwargs)
|
||||||
|
|
||||||
|
def create_client(self, api_key=None, base_url=None, **kwargs):
|
||||||
|
"""Create OpenAI-compatible client for Inception API endpoint.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key: The API key for authentication. If None, uses instance default.
|
||||||
|
base_url: The base URL for the API. If None, uses instance default.
|
||||||
|
**kwargs: Additional keyword arguments for client configuration.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An OpenAI-compatible client configured for Inception's API.
|
||||||
|
"""
|
||||||
|
logger.debug(f"Creating Inception client with api {base_url}")
|
||||||
|
return super().create_client(api_key, base_url, **kwargs)
|
||||||
|
|
||||||
|
def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
|
||||||
|
"""Build parameters for Inception chat completion request.
|
||||||
|
|
||||||
|
Extends the base OpenAI parameters with Inception-specific options
|
||||||
|
such as reasoning_effort and realtime.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
params_from_context: Parameters, derived from the LLM context, to
|
||||||
|
use for the chat completion. Contains messages, tools, and tool
|
||||||
|
choice.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary of parameters for the chat completion request.
|
||||||
|
"""
|
||||||
|
params = super().build_chat_completion_params(params_from_context)
|
||||||
|
|
||||||
|
if (
|
||||||
|
is_given(self._settings.reasoning_effort)
|
||||||
|
and self._settings.reasoning_effort is not None
|
||||||
|
):
|
||||||
|
params["reasoning_effort"] = self._settings.reasoning_effort
|
||||||
|
|
||||||
|
# realtime is Inception-specific and unknown to the OpenAI SDK,
|
||||||
|
# so it must be passed via extra_body to avoid validation errors.
|
||||||
|
extra_body = {}
|
||||||
|
if is_given(self._settings.realtime) and self._settings.realtime is not None:
|
||||||
|
extra_body["realtime"] = self._settings.realtime
|
||||||
|
|
||||||
|
if extra_body:
|
||||||
|
params["extra_body"] = extra_body
|
||||||
|
|
||||||
|
return params
|
||||||
2
uv.lock
generated
2
uv.lock
generated
@@ -4574,7 +4574,7 @@ requires-dist = [
|
|||||||
{ name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1,<1" },
|
{ name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1,<1" },
|
||||||
{ name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" },
|
{ name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" },
|
||||||
]
|
]
|
||||||
provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "langchain", "lemonslice", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "nebius", "neuphonic", "novita", "nvidia", "openai", "rnnoise", "openrouter", "perplexity", "piper", "qwen", "resembleai", "rime", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "smallest", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "vonage-video-connector", "webrtc", "websocket", "websockets-base", "whisper", "xai"]
|
provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inception", "inworld", "koala", "kokoro", "langchain", "lemonslice", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "nebius", "neuphonic", "novita", "nvidia", "openai", "rnnoise", "openrouter", "perplexity", "piper", "qwen", "resembleai", "rime", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "smallest", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "vonage-video-connector", "webrtc", "websocket", "websockets-base", "whisper", "xai"]
|
||||||
|
|
||||||
[package.metadata.requires-dev]
|
[package.metadata.requires-dev]
|
||||||
dev = [
|
dev = [
|
||||||
|
|||||||
Reference in New Issue
Block a user