diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ec09c493..46be71bb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added support for passing in a `ToolsSchem` in lieu of a list of provider- - specific dicts when initializing `OpenAIRealtimeLLMService`. + specific dicts when initializing `OpenAIRealtimeLLMService` or when updating + it using `LLMUpdateSettingsFrame`. - Added `TransportParams.audio_out_silence_secs`, which specifies how many seconds of silence to output when an `EndFrame` reaches the output diff --git a/examples/foundational/19-openai-realtime.py b/examples/foundational/19-openai-realtime.py index b5edc0ff2..31af47bf9 100644 --- a/examples/foundational/19-openai-realtime.py +++ b/examples/foundational/19-openai-realtime.py @@ -14,8 +14,14 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.adapters.services.open_ai_realtime_adapter import OpenAIRealtimeLLMAdapter from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame, LLMSetToolsFrame, TranscriptionMessage +from pipecat.frames.frames import ( + LLMRunFrame, + LLMSetToolsFrame, + LLMUpdateSettingsFrame, + TranscriptionMessage, +) from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -148,6 +154,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): noise_reduction=InputAudioNoiseReduction(type="near_field"), ) ), + # In this example we provide tools through the context, but you could + # alternatively provide them here. # tools=tools, instructions="""You are a helpful and friendly AI. @@ -223,6 +231,10 @@ Remember, your responses should be short. Just one or two sentences, usually. Re standard_tools=[weather_function, restaurant_function, get_news_function] ) await task.queue_frames([LLMSetToolsFrame(tools=new_tools)]) + # Alternative pattern, useful if you're changing other session properties too + # await task.queue_frames( + # [LLMUpdateSettingsFrame(settings=SessionProperties(tools=new_tools).model_dump())] + # ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/src/pipecat/services/openai/realtime/events.py b/src/pipecat/services/openai/realtime/events.py index 42b5561d6..e1a69b43c 100644 --- a/src/pipecat/services/openai/realtime/events.py +++ b/src/pipecat/services/openai/realtime/events.py @@ -198,9 +198,9 @@ class SessionProperties(BaseModel): output_modalities: Optional[List[Literal["text", "audio"]]] = None instructions: Optional[str] = None audio: Optional[AudioConfiguration] = None - # Tools can only be ToolsSchema when provided by user in - # OpenAIRealtimeLLMService constructor. We'll never serialize/deserialize - # ToolsSchema. + # Tools can only be ToolsSchema when provided by the user, in either the + # OpenAIRealtimeLLMService constructor or through LLMUpdateSettingsFrame. + # We'll never serialize/deserialize ToolsSchema when talking to the server. tools: Optional[ToolsSchema | List[Dict]] = None tool_choice: Optional[Literal["auto", "none", "required"]] = None max_output_tokens: Optional[Union[int, Literal["inf"]]] = None diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index 77e326eb8..0129a94f9 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -156,16 +156,6 @@ class OpenAIRealtimeLLMService(LLMService): self._session_properties: events.SessionProperties = ( session_properties or events.SessionProperties() ) - # If needed, map session_properties.tools from ToolsSchema to list of - # dicts, which remote server expects - if self._session_properties.tools and isinstance( - self._session_properties.tools, ToolsSchema - ): - adapter = self.get_llm_adapter() - self._session_properties.tools = adapter.from_standard_tools( - self._session_properties.tools - ) - self._audio_input_paused = start_audio_paused self._websocket = None self._receive_task = None @@ -492,9 +482,9 @@ class OpenAIRealtimeLLMService(LLMService): async def _update_settings(self): settings = self._session_properties + adapter: OpenAIRealtimeLLMAdapter = self.get_llm_adapter() if self._context: - adapter: OpenAIRealtimeLLMAdapter = self.get_llm_adapter() llm_invocation_params = adapter.get_llm_invocation_params(self._context) # tools given in the context override the tools in the session properties @@ -506,6 +496,12 @@ class OpenAIRealtimeLLMService(LLMService): if llm_invocation_params["system_instruction"]: settings.instructions = llm_invocation_params["system_instruction"] + # If needed, map settings.tools from ToolsSchema to list of dicts, + # which remote server expects. It would only be a ToolsSchema if that's + # how it was provided in the constructor or via LLMUpdateSettingsFrame. + if settings.tools and isinstance(settings.tools, ToolsSchema): + settings.tools = adapter.from_standard_tools(settings.tools) + await self.send_client_event(events.SessionUpdateEvent(session=settings)) #