Added support for passing in a ToolsSchema in lieu of a list of provider-specific dicts when initializing OpenAIRealtimeLLMService.
I chose to go the somewhat hacky route of adding the `ToolsSchema` support into the `events.SessionProperties` model itself—even though we should never serialize that type when creating events—because the alternative seemed to be to create a new type for `OpenAIRealtimeLLMService` initialization parameters and then we'd have to contend with backward compatibility, which seemed like a bigger headache.
This commit is contained in:
@@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- Added support for passing in a `ToolsSchem` in lieu of a list of provider-
|
||||
specific dicts when initializing `OpenAIRealtimeLLMService`.
|
||||
|
||||
- Added `TransportParams.audio_out_silence_secs`, which specifies how many
|
||||
seconds of silence to output when an `EndFrame` reaches the output
|
||||
transport. This can help ensure that all audio data is fully delivered to
|
||||
|
||||
@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Literal, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
|
||||
#
|
||||
# session properties
|
||||
#
|
||||
@@ -186,6 +188,9 @@ class SessionProperties(BaseModel):
|
||||
include: Additional fields to include in server outputs.
|
||||
"""
|
||||
|
||||
# Needed to support ToolSchema in tools field.
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
type: Optional[Literal["realtime"]] = "realtime"
|
||||
object: Optional[Literal["realtime.session"]] = None
|
||||
id: Optional[str] = None
|
||||
@@ -193,7 +198,10 @@ class SessionProperties(BaseModel):
|
||||
output_modalities: Optional[List[Literal["text", "audio"]]] = None
|
||||
instructions: Optional[str] = None
|
||||
audio: Optional[AudioConfiguration] = None
|
||||
tools: Optional[List[Dict]] = None
|
||||
# Tools can only be ToolsSchema when provided by user in
|
||||
# OpenAIRealtimeLLMService constructor. We'll never serialize/deserialize
|
||||
# ToolsSchema.
|
||||
tools: Optional[ToolsSchema | List[Dict]] = None
|
||||
tool_choice: Optional[Literal["auto", "none", "required"]] = None
|
||||
max_output_tokens: Optional[Union[int, Literal["inf"]]] = None
|
||||
tracing: Optional[Union[Literal["auto"], Dict]] = None
|
||||
|
||||
@@ -14,6 +14,7 @@ from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.adapters.services.open_ai_realtime_adapter import (
|
||||
OpenAIRealtimeLLMAdapter,
|
||||
)
|
||||
@@ -155,6 +156,16 @@ class OpenAIRealtimeLLMService(LLMService):
|
||||
self._session_properties: events.SessionProperties = (
|
||||
session_properties or events.SessionProperties()
|
||||
)
|
||||
# If needed, map session_properties.tools from ToolsSchema to list of
|
||||
# dicts, which remote server expects
|
||||
if self._session_properties.tools and isinstance(
|
||||
self._session_properties.tools, ToolsSchema
|
||||
):
|
||||
adapter = self.get_llm_adapter()
|
||||
self._session_properties.tools = adapter.from_standard_tools(
|
||||
self._session_properties.tools
|
||||
)
|
||||
|
||||
self._audio_input_paused = start_audio_paused
|
||||
self._websocket = None
|
||||
self._receive_task = None
|
||||
|
||||
Reference in New Issue
Block a user