Added support for passing in a ToolsSchema in lieu of a list of provider-specific dicts when initializing OpenAIRealtimeLLMService.

I chose to go the somewhat hacky route of adding the `ToolsSchema` support into the `events.SessionProperties` model itself—even though we should never serialize that type when creating events—because the alternative seemed to be to create a new type for `OpenAIRealtimeLLMService` initialization parameters and then we'd have to contend with backward compatibility, which seemed like a bigger headache.
This commit is contained in:
Paul Kompfner
2025-11-07 09:50:26 -05:00
parent 613ad74103
commit 925a6cc2ef
3 changed files with 23 additions and 1 deletions

View File

@@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Added support for passing in a `ToolsSchem` in lieu of a list of provider-
specific dicts when initializing `OpenAIRealtimeLLMService`.
- Added `TransportParams.audio_out_silence_secs`, which specifies how many
seconds of silence to output when an `EndFrame` reaches the output
transport. This can help ensure that all audio data is fully delivered to

View File

@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Literal, Optional, Union
from pydantic import BaseModel, ConfigDict, Field
from pipecat.adapters.schemas.tools_schema import ToolsSchema
#
# session properties
#
@@ -186,6 +188,9 @@ class SessionProperties(BaseModel):
include: Additional fields to include in server outputs.
"""
# Needed to support ToolSchema in tools field.
model_config = ConfigDict(arbitrary_types_allowed=True)
type: Optional[Literal["realtime"]] = "realtime"
object: Optional[Literal["realtime.session"]] = None
id: Optional[str] = None
@@ -193,7 +198,10 @@ class SessionProperties(BaseModel):
output_modalities: Optional[List[Literal["text", "audio"]]] = None
instructions: Optional[str] = None
audio: Optional[AudioConfiguration] = None
tools: Optional[List[Dict]] = None
# Tools can only be ToolsSchema when provided by user in
# OpenAIRealtimeLLMService constructor. We'll never serialize/deserialize
# ToolsSchema.
tools: Optional[ToolsSchema | List[Dict]] = None
tool_choice: Optional[Literal["auto", "none", "required"]] = None
max_output_tokens: Optional[Union[int, Literal["inf"]]] = None
tracing: Optional[Union[Literal["auto"], Dict]] = None

View File

@@ -14,6 +14,7 @@ from typing import Optional
from loguru import logger
from pipecat.adapters.schemas.tools_schema import ToolsSchema
from pipecat.adapters.services.open_ai_realtime_adapter import (
OpenAIRealtimeLLMAdapter,
)
@@ -155,6 +156,16 @@ class OpenAIRealtimeLLMService(LLMService):
self._session_properties: events.SessionProperties = (
session_properties or events.SessionProperties()
)
# If needed, map session_properties.tools from ToolsSchema to list of
# dicts, which remote server expects
if self._session_properties.tools and isinstance(
self._session_properties.tools, ToolsSchema
):
adapter = self.get_llm_adapter()
self._session_properties.tools = adapter.from_standard_tools(
self._session_properties.tools
)
self._audio_input_paused = start_audio_paused
self._websocket = None
self._receive_task = None