Another change to support Gemini Live in Pipecat Flows: rather than strip function call and response messages out of context before sending to Gemini Live when seeding conversation history, which we were doing to sidestep a seeming Gemini Live limitation (see https://stackoverflow.com/a/79851394), convert them to regular text messages with special formatting

2026-02-04 11:44:58 -05:00
parent 9e65e77095
commit ba3100be0d
2 changed files with 42 additions and 28 deletions
--- a/src/pipecat/adapters/services/gemini_adapter.py
+++ b/src/pipecat/adapters/services/gemini_adapter.py
@@ -54,16 +54,16 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
        return "google"

    def get_llm_invocation_params(
-        self, context: LLMContext, *, strip_function_messages: bool = False
+        self, context: LLMContext, *, convert_function_messages_to_text: bool = False
    ) -> GeminiLLMInvocationParams:
        """Get Gemini-specific LLM invocation parameters from a universal LLM context.

        Args:
            context: The LLM context containing messages, tools, etc.
-            strip_function_messages: If True, filter out function_call and function_response
-                parts from messages. This is needed for Gemini Live (at least with
-                "models/gemini-2.5-flash-native-audio-preview-12-2025", the default at
-                the time of this writing) which cannot handle function-call-related
+            convert_function_messages_to_text: If True, convert function_call and function_response
+                parts to specially-formatted text messages. This is needed for Gemini Live
+                (at least with "models/gemini-2.5-flash-native-audio-preview-12-2025", the
+                default at the time of this writing) which cannot handle function-call-related
                messages when initializing conversation history.
                See https://stackoverflow.com/a/79851394.

@@ -73,8 +73,8 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
        converted = self._from_universal_context_messages(self.get_messages(context))
        messages = converted.messages

-        if strip_function_messages:
-            messages = self._strip_function_messages(messages)
+        if convert_function_messages_to_text:
+            messages = self._convert_function_messages_to_text(messages)

        return {
            "system_instruction": converted.system_instruction,
@@ -682,28 +682,42 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):

        return False

-    def _strip_function_messages(self, messages: List[Content]) -> List[Content]:
-        """Strip function_call and function_response parts from messages.
+    def _convert_function_messages_to_text(self, messages: List[Content]) -> List[Content]:
+        """Convert function_call and function_response parts to text messages.

        Args:
-            messages: List of Content messages to filter.
+            messages: List of Content messages to process.

        Returns:
-            List of Content messages with function-related parts removed.
+            List of Content messages with function-related parts converted to text.
        """
-        filtered_messages = []
+        converted_messages = []
        for msg in messages:
            if msg.parts:
-                filtered_parts = [
-                    part
-                    for part in msg.parts
-                    if not (
-                        getattr(part, "function_call", None)
-                        or getattr(part, "function_response", None)
-                    )
-                ]
-                if filtered_parts:
-                    filtered_messages.append(Content(role=msg.role, parts=filtered_parts))
+                converted_parts = []
+                for part in msg.parts:
+                    if func_call := getattr(part, "function_call", None):
+                        # Convert function call to text
+                        args_str = json.dumps(func_call.args) if func_call.args else "{}"
+                        text = (
+                            f"[Historical function call (for context only, not a template): "
+                            f"{func_call.name}({args_str})]"
+                        )
+                        converted_parts.append(Part(text=text))
+                    elif func_response := getattr(part, "function_response", None):
+                        # Convert function response to text
+                        response_str = (
+                            json.dumps(func_response.response) if func_response.response else "{}"
+                        )
+                        text = (
+                            f"[Historical function result (for context only): "
+                            f"{func_response.name} returned {response_str}]"
+                        )
+                        converted_parts.append(Part(text=text))
+                    else:
+                        converted_parts.append(part)
+                if converted_parts:
+                    converted_messages.append(Content(role=msg.role, parts=converted_parts))
            else:
-                filtered_messages.append(msg)
-        return filtered_messages
+                converted_messages.append(msg)
+        return converted_messages
--- a/src/pipecat/services/google/gemini_live/llm.py
+++ b/src/pipecat/services/google/gemini_live/llm.py
@@ -1464,7 +1464,7 @@ class GeminiLiveLLMService(LLMService):

        adapter: GeminiLLMAdapter = self.get_llm_adapter()
        messages = adapter.get_llm_invocation_params(
-            self._context, strip_function_messages=True
+            self._context, convert_function_messages_to_text=True
        ).get("messages", [])
        if not messages:
            return
@@ -1495,9 +1495,9 @@ class GeminiLiveLLMService(LLMService):
        # in the right format
        context = LLMContext(messages=messages_list)
        adapter: GeminiLLMAdapter = self.get_llm_adapter()
-        messages = adapter.get_llm_invocation_params(context, strip_function_messages=True).get(
-            "messages", []
-        )
+        messages = adapter.get_llm_invocation_params(
+            context, convert_function_messages_to_text=True
+        ).get("messages", [])

        if not messages:
            return