Add support for Gemini 3 Pro non-function-call-related thought signatures

2025-12-04 14:29:08 -05:00
parent 0cdf0c4504
commit c8c6f424cd
6 changed files with 115 additions and 3 deletions
--- a/examples/foundational/49-thinking-functions.py
+++ b/examples/foundational/49-thinking-functions.py
@@ -108,8 +108,10 @@ async def run_bot(
    elif llm_provider == LLM_GOOGLE:
        llm = GoogleLLMService(
            api_key=os.getenv("GOOGLE_API_KEY"),
+            # model="gemini-3-pro-preview", # A more powerful reasoning model, but slower
            params=GoogleLLMService.InputParams(
                thinking=GoogleLLMService.ThinkingConfig(
+                    # thinking_level="low", # Use this field instead of thinking_budget for Gemini 3 Pro. Defaults to "high".
                    thinking_budget=-1,  # Dynamic thinking
                    include_thoughts=True,
                )
--- a/examples/foundational/49-thinking.py
+++ b/examples/foundational/49-thinking.py
@@ -87,8 +87,10 @@ async def run_bot(
    elif llm_provider == LLM_GOOGLE:
        llm = GoogleLLMService(
            api_key=os.getenv("GOOGLE_API_KEY"),
+            # model="gemini-3-pro-preview", # A more powerful reasoning model, but slower
            params=GoogleLLMService.InputParams(
                thinking=GoogleLLMService.ThinkingConfig(
+                    # thinking_level="low", # Use this field instead of thinking_budget for Gemini 3 Pro. Defaults to "high".
                    thinking_budget=-1,  # Dynamic thinking
                    include_thoughts=True,
                )
--- a/src/pipecat/adapters/services/gemini_adapter.py
+++ b/src/pipecat/adapters/services/gemini_adapter.py
@@ -210,6 +210,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
        system_instruction = None
        messages = []
        tool_call_id_to_name_mapping = {}
+        non_fn_thought_signatures = []

        # Process each message, preserving Google-formatted messages and converting others
        for message in universal_context_messages:
@@ -234,6 +235,17 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
                )
                continue

+            # If we found a standalone non-function-call-related thought
+            # signature (Gemini 3 Pro), store it to apply later to the
+            # corresponding assistant message
+            if (
+                isinstance(result.content, dict)
+                and result.content.get("type") == "thought_signature"
+                and (thought_signature := result.content.get("signature"))
+            ):
+                non_fn_thought_signatures.append(thought_signature)
+                continue
+
            # Each result is either a Content or a system instruction
            if result.content:
                messages.append(result.content)
@@ -244,6 +256,10 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
            if result.tool_call_id_to_name_mapping:
                tool_call_id_to_name_mapping.update(result.tool_call_id_to_name_mapping)

+        # Apply non-function-call-related thought signatures to the appropriate
+        # messages
+        self._apply_non_function_thought_signatures_to_messages(non_fn_thought_signatures, messages)
+
        # Check if we only have function-related messages (no regular text)
        has_regular_messages = any(
            len(msg.parts) == 1
@@ -434,7 +450,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
        Args:
            thought_signature: The thought signature bytes to apply.
            tool_call_id: ID of the tool call message to find and modify.
-            messages: List of Content messages to search through.
+            messages: List of messages to search through.
        """
        # Search backwards through messages to find the matching function call
        for message in reversed(messages):
@@ -454,3 +470,46 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
                continue
            # Break outer loop if inner loop broke (found match)
            break
+
+    def _apply_non_function_thought_signatures_to_messages(
+        self, thought_signatures: List[bytes], messages: List[Content]
+    ) -> None:
+        """Apply non-function-call-related thought signatures to the last part of each non-function-call assistant message.
+
+        Gemini 3 Pro outputs a thought signature at the end of each assistant
+        response.
+
+        Args:
+            thought_signatures: The list of thought signature bytes to apply.
+            messages: List of messages to search through.
+        """
+        if not thought_signatures:
+            return
+
+        # Find all assistant (model) messages that aren't function calls
+        non_fn_assistant_messages = []
+        for message in messages:
+            if not isinstance(message, Content) or not message.parts:
+                continue
+            # Check if this is a model message without function calls
+            if message.role == "model":
+                has_function_call = any(
+                    hasattr(part, "function_call") and part.function_call for part in message.parts
+                )
+                if not has_function_call:
+                    non_fn_assistant_messages.append(message)
+
+        # Warn if counts don't match
+        if len(thought_signatures) != len(non_fn_assistant_messages):
+            logger.warning(
+                f"Thought signature count ({len(thought_signatures)}) doesn't match "
+                f"non-function-call assistant message count ({len(non_fn_assistant_messages)})"
+            )
+
+        # Apply thought signatures to the corresponding assistant messages
+        # Match them in order (oldest to newest)
+        for i, thought_signature in enumerate(thought_signatures):
+            if i < len(non_fn_assistant_messages):
+                message = non_fn_assistant_messages[i]
+                if message.parts:
+                    message.parts[-1].thought_signature = thought_signature
--- a/src/pipecat/frames/frames.py
+++ b/src/pipecat/frames/frames.py
@@ -594,7 +594,8 @@ class LLMThoughtStartFrame(ControlFrame):
            If it is appended, the `llm` field is required, since it will be
            appended as an `LLMSpecificMessage`.
        llm: Optional identifier of the LLM provider for LLM-specific handling.
-            Only required if `append_to_context` is True.
+            Only required if `append_to_context` is True, as the thought is
+            appended to context as an `LLMSpecificMessage`.
    """

    append_to_context: bool = False
@@ -642,7 +643,7 @@ class LLMThoughtEndFrame(ControlFrame):

    Parameters:
        thought_metadata: Optional metadata associated with the thought,
-            e.g. thought signature.
+            e.g. an Anthropic thought signature.
    """

    thought_metadata: Optional[Dict[str, Any]] = None
@@ -652,6 +653,28 @@ class LLMThoughtEndFrame(ControlFrame):
        return f"{self.name}(pts: {pts}, metadata: {self.thought_metadata})"


+@dataclass
+class LLMThoughtSignatureFrame(DataFrame):
+    """Frame containing a standalone LLM thought signature (as opposed to a thought signature associated with a thought).
+
+    This is useful for Gemini 3 Pro, which can output a signature at the end of
+    a response.
+
+    Parameters:
+        llm: Identifier of the LLM provider for LLM-specific handling.
+            Needed because the thought signature is appended to context as an
+            `LLMSpecificMessage`.
+        signature: The thought signature data.
+    """
+
+    llm: str
+    signature: Any
+
+    def __str__(self):
+        pts = format_pts(self.pts)
+        return f"{self.name}(pts: {pts}, signature: {self.signature})"
+
+
@dataclass
 class LLMMessagesFrame(DataFrame):
    """Frame containing LLM messages for chat completion.
--- a/src/pipecat/processors/aggregators/llm_response_universal.py
+++ b/src/pipecat/processors/aggregators/llm_response_universal.py
@@ -48,6 +48,7 @@ from pipecat.frames.frames import (
    LLMSetToolChoiceFrame,
    LLMSetToolsFrame,
    LLMThoughtEndFrame,
+    LLMThoughtSignatureFrame,
    LLMThoughtStartFrame,
    LLMThoughtTextFrame,
    SpeechControlParamsFrame,
@@ -643,6 +644,8 @@ class LLMAssistantAggregator(LLMContextAggregator):
            await self._handle_thought_text(frame)
        elif isinstance(frame, LLMThoughtEndFrame):
            await self._handle_thought_end(frame)
+        elif isinstance(frame, LLMThoughtSignatureFrame):
+            await self._handle_standalone_thought_signature(frame)
        elif isinstance(frame, LLMRunFrame):
            await self._handle_llm_run(frame)
        elif isinstance(frame, LLMMessagesAppendFrame):
@@ -907,6 +910,17 @@ class LLMAssistantAggregator(LLMContextAggregator):
            )
        )

+    async def _handle_standalone_thought_signature(self, frame: LLMThoughtSignatureFrame):
+        self._context.add_message(
+            LLMSpecificMessage(
+                llm=frame.llm,
+                message={
+                    "type": "thought_signature",
+                    "signature": frame.signature,
+                },
+            )
+        )
+
    def _context_updated_task_finished(self, task: asyncio.Task):
        self._context_updated_tasks.discard(task)

--- a/src/pipecat/services/google/llm.py
+++ b/src/pipecat/services/google/llm.py
@@ -35,6 +35,7 @@ from pipecat.frames.frames import (
    LLMMessagesFrame,
    LLMTextFrame,
    LLMThoughtEndFrame,
+    LLMThoughtSignatureFrame,
    LLMThoughtStartFrame,
    LLMThoughtTextFrame,
    LLMUpdateSettingsFrame,
@@ -1000,6 +1001,17 @@ class GoogleLLMService(LLMService):
                                )
                                await self.push_frame(frame)

+                            # With Gemini 3 Pro, thought signatures can be
+                            # included in any kind of part, not just function
+                            # calls. It will come in the last part of a response.
+                            if part.thought_signature and not part.function_call:
+                                await self.push_frame(
+                                    LLMThoughtSignatureFrame(
+                                        llm=self.get_llm_adapter().id_for_llm_specific_messages,
+                                        signature=part.thought_signature,
+                                    )
+                                )
+
                    if (
                        candidate.grounding_metadata
                        and candidate.grounding_metadata.grounding_chunks