From ba3100be0d02fa77e8900cbfbc461aeab198fb57 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 4 Feb 2026 11:44:58 -0500 Subject: [PATCH] Another change to support Gemini Live in Pipecat Flows: rather than strip function call and response messages out of context before sending to Gemini Live when seeding conversation history, which we were doing to sidestep a seeming Gemini Live limitation (see https://stackoverflow.com/a/79851394), convert them to regular text messages with special formatting --- .../adapters/services/gemini_adapter.py | 62 ++++++++++++------- .../services/google/gemini_live/llm.py | 8 +-- 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/pipecat/adapters/services/gemini_adapter.py b/src/pipecat/adapters/services/gemini_adapter.py index 7790299c7..86fee1a58 100644 --- a/src/pipecat/adapters/services/gemini_adapter.py +++ b/src/pipecat/adapters/services/gemini_adapter.py @@ -54,16 +54,16 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): return "google" def get_llm_invocation_params( - self, context: LLMContext, *, strip_function_messages: bool = False + self, context: LLMContext, *, convert_function_messages_to_text: bool = False ) -> GeminiLLMInvocationParams: """Get Gemini-specific LLM invocation parameters from a universal LLM context. Args: context: The LLM context containing messages, tools, etc. - strip_function_messages: If True, filter out function_call and function_response - parts from messages. This is needed for Gemini Live (at least with - "models/gemini-2.5-flash-native-audio-preview-12-2025", the default at - the time of this writing) which cannot handle function-call-related + convert_function_messages_to_text: If True, convert function_call and function_response + parts to specially-formatted text messages. This is needed for Gemini Live + (at least with "models/gemini-2.5-flash-native-audio-preview-12-2025", the + default at the time of this writing) which cannot handle function-call-related messages when initializing conversation history. See https://stackoverflow.com/a/79851394. @@ -73,8 +73,8 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): converted = self._from_universal_context_messages(self.get_messages(context)) messages = converted.messages - if strip_function_messages: - messages = self._strip_function_messages(messages) + if convert_function_messages_to_text: + messages = self._convert_function_messages_to_text(messages) return { "system_instruction": converted.system_instruction, @@ -682,28 +682,42 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): return False - def _strip_function_messages(self, messages: List[Content]) -> List[Content]: - """Strip function_call and function_response parts from messages. + def _convert_function_messages_to_text(self, messages: List[Content]) -> List[Content]: + """Convert function_call and function_response parts to text messages. Args: - messages: List of Content messages to filter. + messages: List of Content messages to process. Returns: - List of Content messages with function-related parts removed. + List of Content messages with function-related parts converted to text. """ - filtered_messages = [] + converted_messages = [] for msg in messages: if msg.parts: - filtered_parts = [ - part - for part in msg.parts - if not ( - getattr(part, "function_call", None) - or getattr(part, "function_response", None) - ) - ] - if filtered_parts: - filtered_messages.append(Content(role=msg.role, parts=filtered_parts)) + converted_parts = [] + for part in msg.parts: + if func_call := getattr(part, "function_call", None): + # Convert function call to text + args_str = json.dumps(func_call.args) if func_call.args else "{}" + text = ( + f"[Historical function call (for context only, not a template): " + f"{func_call.name}({args_str})]" + ) + converted_parts.append(Part(text=text)) + elif func_response := getattr(part, "function_response", None): + # Convert function response to text + response_str = ( + json.dumps(func_response.response) if func_response.response else "{}" + ) + text = ( + f"[Historical function result (for context only): " + f"{func_response.name} returned {response_str}]" + ) + converted_parts.append(Part(text=text)) + else: + converted_parts.append(part) + if converted_parts: + converted_messages.append(Content(role=msg.role, parts=converted_parts)) else: - filtered_messages.append(msg) - return filtered_messages + converted_messages.append(msg) + return converted_messages diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 5c2871fdc..ea60945c5 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -1464,7 +1464,7 @@ class GeminiLiveLLMService(LLMService): adapter: GeminiLLMAdapter = self.get_llm_adapter() messages = adapter.get_llm_invocation_params( - self._context, strip_function_messages=True + self._context, convert_function_messages_to_text=True ).get("messages", []) if not messages: return @@ -1495,9 +1495,9 @@ class GeminiLiveLLMService(LLMService): # in the right format context = LLMContext(messages=messages_list) adapter: GeminiLLMAdapter = self.get_llm_adapter() - messages = adapter.get_llm_invocation_params(context, strip_function_messages=True).get( - "messages", [] - ) + messages = adapter.get_llm_invocation_params( + context, convert_function_messages_to_text=True + ).get("messages", []) if not messages: return