From eda12f56e618ff54d6b8ade03f3ee3bbe28bf2cc Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 6 Nov 2025 19:42:35 -0500 Subject: [PATCH] Add clarifying documentation about OpenAI Realtime model use --- src/pipecat/services/openai/realtime/events.py | 4 ++++ src/pipecat/services/openai/realtime/llm.py | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/pipecat/services/openai/realtime/events.py b/src/pipecat/services/openai/realtime/events.py index 200e59d68..f52298ab3 100644 --- a/src/pipecat/services/openai/realtime/events.py +++ b/src/pipecat/services/openai/realtime/events.py @@ -170,6 +170,10 @@ class SessionProperties(BaseModel): object: Object type identifier, always "realtime.session". id: Unique identifier for the session. model: The Realtime model used for this session. + Note: The model is set at connection time via the WebSocket URL query + parameter and cannot be changed during the session. This field is populated + by the server in session.created and session.updated responses, but is effectively + a no-op for session.update requests. output_modalities: The set of modalities the model can respond with. instructions: System instructions for the assistant. audio: Configuration for input and output audio. diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index 00a0322a8..6d3604541 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -114,10 +114,13 @@ class OpenAIRealtimeLLMService(LLMService): Args: api_key: OpenAI API key for authentication. model: OpenAI model name. Defaults to "gpt-realtime". + This is a connection-level parameter set via the WebSocket URL query + parameter and cannot be changed during the session. base_url: WebSocket base URL for the realtime API. Defaults to "wss://api.openai.com/v1/realtime". session_properties: Configuration properties for the realtime session. - If None, uses default SessionProperties. + These are session-level settings that can be updated during the session + (except for voice and model). If None, uses default SessionProperties. start_audio_paused: Whether to start with audio input paused. Defaults to False. send_transcription_frames: Whether to emit transcription frames. @@ -139,6 +142,8 @@ class OpenAIRealtimeLLMService(LLMService): stacklevel=2, ) + # Build WebSocket URL with model query parameter + # Source: https://platform.openai.com/docs/guides/realtime-websocket full_url = f"{base_url}?model={model}" super().__init__(base_url=full_url, **kwargs) @@ -146,6 +151,7 @@ class OpenAIRealtimeLLMService(LLMService): self.base_url = full_url self.set_model_name(model) + # Initialize session_properties self._session_properties: events.SessionProperties = ( session_properties or events.SessionProperties() )