Added support for passing in a ToolsSchema in lieu of a list of provider-specific dicts when initializing OpenAIRealtimeLLMService.

I chose to go the somewhat hacky route of adding the `ToolsSchema` support into the `events.SessionProperties` model itself—even though we should never serialize that type when creating events—because the alternative seemed to be to create a new type for `OpenAIRealtimeLLMService` initialization parameters and then we'd have to contend with backward compatibility, which seemed like a bigger headache.
2025-11-07 09:50:26 -05:00
parent 613ad74103
commit 925a6cc2ef
3 changed files with 23 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added

+- Added support for passing in a `ToolsSchem` in lieu of a list of provider-
+  specific dicts when initializing `OpenAIRealtimeLLMService`.
+
 - Added `TransportParams.audio_out_silence_secs`, which specifies how many
  seconds of silence to output when an `EndFrame` reaches the output
  transport. This can help ensure that all audio data is fully delivered to
--- a/src/pipecat/services/openai/realtime/events.py
+++ b/src/pipecat/services/openai/realtime/events.py
@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Literal, Optional, Union

 from pydantic import BaseModel, ConfigDict, Field

+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+
 #
 # session properties
 #
@@ -186,6 +188,9 @@ class SessionProperties(BaseModel):
        include: Additional fields to include in server outputs.
    """

+    # Needed to support ToolSchema in tools field.
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
    type: Optional[Literal["realtime"]] = "realtime"
    object: Optional[Literal["realtime.session"]] = None
    id: Optional[str] = None
@@ -193,7 +198,10 @@ class SessionProperties(BaseModel):
    output_modalities: Optional[List[Literal["text", "audio"]]] = None
    instructions: Optional[str] = None
    audio: Optional[AudioConfiguration] = None
-    tools: Optional[List[Dict]] = None
+    # Tools can only be ToolsSchema when provided by user in
+    # OpenAIRealtimeLLMService constructor. We'll never serialize/deserialize
+    # ToolsSchema.
+    tools: Optional[ToolsSchema | List[Dict]] = None
    tool_choice: Optional[Literal["auto", "none", "required"]] = None
    max_output_tokens: Optional[Union[int, Literal["inf"]]] = None
    tracing: Optional[Union[Literal["auto"], Dict]] = None
--- a/src/pipecat/services/openai/realtime/llm.py
+++ b/src/pipecat/services/openai/realtime/llm.py
@@ -14,6 +14,7 @@ from typing import Optional

 from loguru import logger

+from pipecat.adapters.schemas.tools_schema import ToolsSchema
 from pipecat.adapters.services.open_ai_realtime_adapter import (
    OpenAIRealtimeLLMAdapter,
 )
@@ -155,6 +156,16 @@ class OpenAIRealtimeLLMService(LLMService):
        self._session_properties: events.SessionProperties = (
            session_properties or events.SessionProperties()
        )
+        # If needed, map session_properties.tools from ToolsSchema to list of
+        # dicts, which remote server expects
+        if self._session_properties.tools and isinstance(
+            self._session_properties.tools, ToolsSchema
+        ):
+            adapter = self.get_llm_adapter()
+            self._session_properties.tools = adapter.from_standard_tools(
+                self._session_properties.tools
+            )
+
        self._audio_input_paused = start_audio_paused
        self._websocket = None
        self._receive_task = None