Improve SessionProperties initialization: remove voice from args, set default for TurnDetection

This commit is contained in:
Mark Backman
2025-12-20 08:02:48 -05:00
parent 0576783c5e
commit 348fa5a719
3 changed files with 16 additions and 23 deletions

View File

@@ -52,7 +52,6 @@ from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.grok.realtime.events import (
SessionProperties,
TurnDetection,
WebSearchTool,
XSearchTool,
)
@@ -172,11 +171,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
# Configure Grok session properties
session_properties = SessionProperties(
# Voice options: Ara (warm, friendly), Rex (confident), Sal (smooth),
# Eve (energetic), Leo (authoritative)
# Voice options: Ara, Rex, Sal, Eve, Leo
voice="Ara",
# Enable server-side VAD for automatic turn detection
turn_detection=TurnDetection(type="server_vad"),
# System instructions
instructions="""You are a helpful and friendly AI assistant powered by Grok.

View File

@@ -205,7 +205,9 @@ class SessionProperties(BaseModel):
Parameters:
instructions: System instructions for the assistant.
voice: The voice the model uses to respond. Options: Ara, Rex, Sal, Eve, Leo.
turn_detection: Configuration for turn detection, or None for manual.
Defaults to "Ara".
turn_detection: Configuration for turn detection. Defaults to server-side VAD.
Set to None for manual turn detection.
audio: Configuration for input and output audio.
tools: Available tools for the assistant (web_search, x_search, file_search, function).
"""
@@ -215,7 +217,9 @@ class SessionProperties(BaseModel):
instructions: Optional[str] = None
voice: Optional[GrokVoice] = "Ara"
turn_detection: Optional[TurnDetection] = None
turn_detection: Optional[TurnDetection] = Field(
default_factory=lambda: TurnDetection(type="server_vad")
)
audio: Optional[AudioConfiguration] = None
# Tools can be ToolsSchema when provided by user, or list of dicts for API
tools: Optional[ToolsSchema | List[GrokTool]] = None

View File

@@ -107,7 +107,6 @@ class GrokRealtimeLLMService(LLMService):
self,
*,
api_key: str,
voice: events.GrokVoice = "Ara",
base_url: str = "wss://api.x.ai/v1/realtime",
session_properties: Optional[events.SessionProperties] = None,
start_audio_paused: bool = False,
@@ -117,12 +116,15 @@ class GrokRealtimeLLMService(LLMService):
Args:
api_key: xAI API key for authentication.
voice: Voice to use for responses. Options: Ara, Rex, Sal, Eve, Leo.
Defaults to "Ara".
base_url: WebSocket base URL for the realtime API.
Defaults to "wss://api.x.ai/v1/realtime".
session_properties: Configuration properties for the realtime session.
If None, uses default SessionProperties with the specified voice.
If None, uses default SessionProperties with voice "Ara".
To set a different voice, configure it in session_properties:
session_properties = events.SessionProperties(voice="Rex")
Available voices: Ara, Rex, Sal, Eve, Leo.
start_audio_paused: Whether to start with audio input paused. Defaults to False.
**kwargs: Additional arguments passed to parent LLMService.
"""
@@ -130,20 +132,11 @@ class GrokRealtimeLLMService(LLMService):
self.api_key = api_key
self.base_url = base_url
self._voice = voice
# Initialize session_properties
if session_properties:
self._session_properties = session_properties
# Ensure voice is set
if not self._session_properties.voice:
self._session_properties.voice = voice
else:
self._session_properties = events.SessionProperties(
voice=voice,
turn_detection=events.TurnDetection(type="server_vad"),
# Audio config will be set in start() based on PipelineParams
)
self._session_properties: events.SessionProperties = (
session_properties or events.SessionProperties()
)
self._audio_input_paused = start_audio_paused
self._websocket = None