From c8c6f424cd6a02f57e72b9f984db62c958447da4 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 4 Dec 2025 14:29:08 -0500 Subject: [PATCH] Add support for Gemini 3 Pro non-function-call-related thought signatures --- .../foundational/49-thinking-functions.py | 2 + examples/foundational/49-thinking.py | 2 + .../adapters/services/gemini_adapter.py | 61 ++++++++++++++++++- src/pipecat/frames/frames.py | 27 +++++++- .../aggregators/llm_response_universal.py | 14 +++++ src/pipecat/services/google/llm.py | 12 ++++ 6 files changed, 115 insertions(+), 3 deletions(-) diff --git a/examples/foundational/49-thinking-functions.py b/examples/foundational/49-thinking-functions.py index 2b96e304d..ffc93dd04 100644 --- a/examples/foundational/49-thinking-functions.py +++ b/examples/foundational/49-thinking-functions.py @@ -108,8 +108,10 @@ async def run_bot( elif llm_provider == LLM_GOOGLE: llm = GoogleLLMService( api_key=os.getenv("GOOGLE_API_KEY"), + # model="gemini-3-pro-preview", # A more powerful reasoning model, but slower params=GoogleLLMService.InputParams( thinking=GoogleLLMService.ThinkingConfig( + # thinking_level="low", # Use this field instead of thinking_budget for Gemini 3 Pro. Defaults to "high". thinking_budget=-1, # Dynamic thinking include_thoughts=True, ) diff --git a/examples/foundational/49-thinking.py b/examples/foundational/49-thinking.py index 512163be4..8b9421e1c 100644 --- a/examples/foundational/49-thinking.py +++ b/examples/foundational/49-thinking.py @@ -87,8 +87,10 @@ async def run_bot( elif llm_provider == LLM_GOOGLE: llm = GoogleLLMService( api_key=os.getenv("GOOGLE_API_KEY"), + # model="gemini-3-pro-preview", # A more powerful reasoning model, but slower params=GoogleLLMService.InputParams( thinking=GoogleLLMService.ThinkingConfig( + # thinking_level="low", # Use this field instead of thinking_budget for Gemini 3 Pro. Defaults to "high". thinking_budget=-1, # Dynamic thinking include_thoughts=True, ) diff --git a/src/pipecat/adapters/services/gemini_adapter.py b/src/pipecat/adapters/services/gemini_adapter.py index fd91b818f..a81131fce 100644 --- a/src/pipecat/adapters/services/gemini_adapter.py +++ b/src/pipecat/adapters/services/gemini_adapter.py @@ -210,6 +210,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): system_instruction = None messages = [] tool_call_id_to_name_mapping = {} + non_fn_thought_signatures = [] # Process each message, preserving Google-formatted messages and converting others for message in universal_context_messages: @@ -234,6 +235,17 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): ) continue + # If we found a standalone non-function-call-related thought + # signature (Gemini 3 Pro), store it to apply later to the + # corresponding assistant message + if ( + isinstance(result.content, dict) + and result.content.get("type") == "thought_signature" + and (thought_signature := result.content.get("signature")) + ): + non_fn_thought_signatures.append(thought_signature) + continue + # Each result is either a Content or a system instruction if result.content: messages.append(result.content) @@ -244,6 +256,10 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): if result.tool_call_id_to_name_mapping: tool_call_id_to_name_mapping.update(result.tool_call_id_to_name_mapping) + # Apply non-function-call-related thought signatures to the appropriate + # messages + self._apply_non_function_thought_signatures_to_messages(non_fn_thought_signatures, messages) + # Check if we only have function-related messages (no regular text) has_regular_messages = any( len(msg.parts) == 1 @@ -434,7 +450,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): Args: thought_signature: The thought signature bytes to apply. tool_call_id: ID of the tool call message to find and modify. - messages: List of Content messages to search through. + messages: List of messages to search through. """ # Search backwards through messages to find the matching function call for message in reversed(messages): @@ -454,3 +470,46 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): continue # Break outer loop if inner loop broke (found match) break + + def _apply_non_function_thought_signatures_to_messages( + self, thought_signatures: List[bytes], messages: List[Content] + ) -> None: + """Apply non-function-call-related thought signatures to the last part of each non-function-call assistant message. + + Gemini 3 Pro outputs a thought signature at the end of each assistant + response. + + Args: + thought_signatures: The list of thought signature bytes to apply. + messages: List of messages to search through. + """ + if not thought_signatures: + return + + # Find all assistant (model) messages that aren't function calls + non_fn_assistant_messages = [] + for message in messages: + if not isinstance(message, Content) or not message.parts: + continue + # Check if this is a model message without function calls + if message.role == "model": + has_function_call = any( + hasattr(part, "function_call") and part.function_call for part in message.parts + ) + if not has_function_call: + non_fn_assistant_messages.append(message) + + # Warn if counts don't match + if len(thought_signatures) != len(non_fn_assistant_messages): + logger.warning( + f"Thought signature count ({len(thought_signatures)}) doesn't match " + f"non-function-call assistant message count ({len(non_fn_assistant_messages)})" + ) + + # Apply thought signatures to the corresponding assistant messages + # Match them in order (oldest to newest) + for i, thought_signature in enumerate(thought_signatures): + if i < len(non_fn_assistant_messages): + message = non_fn_assistant_messages[i] + if message.parts: + message.parts[-1].thought_signature = thought_signature diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 2c3b802ec..d538ce6b4 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -594,7 +594,8 @@ class LLMThoughtStartFrame(ControlFrame): If it is appended, the `llm` field is required, since it will be appended as an `LLMSpecificMessage`. llm: Optional identifier of the LLM provider for LLM-specific handling. - Only required if `append_to_context` is True. + Only required if `append_to_context` is True, as the thought is + appended to context as an `LLMSpecificMessage`. """ append_to_context: bool = False @@ -642,7 +643,7 @@ class LLMThoughtEndFrame(ControlFrame): Parameters: thought_metadata: Optional metadata associated with the thought, - e.g. thought signature. + e.g. an Anthropic thought signature. """ thought_metadata: Optional[Dict[str, Any]] = None @@ -652,6 +653,28 @@ class LLMThoughtEndFrame(ControlFrame): return f"{self.name}(pts: {pts}, metadata: {self.thought_metadata})" +@dataclass +class LLMThoughtSignatureFrame(DataFrame): + """Frame containing a standalone LLM thought signature (as opposed to a thought signature associated with a thought). + + This is useful for Gemini 3 Pro, which can output a signature at the end of + a response. + + Parameters: + llm: Identifier of the LLM provider for LLM-specific handling. + Needed because the thought signature is appended to context as an + `LLMSpecificMessage`. + signature: The thought signature data. + """ + + llm: str + signature: Any + + def __str__(self): + pts = format_pts(self.pts) + return f"{self.name}(pts: {pts}, signature: {self.signature})" + + @dataclass class LLMMessagesFrame(DataFrame): """Frame containing LLM messages for chat completion. diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 04f9c4ae9..4a074da93 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -48,6 +48,7 @@ from pipecat.frames.frames import ( LLMSetToolChoiceFrame, LLMSetToolsFrame, LLMThoughtEndFrame, + LLMThoughtSignatureFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, SpeechControlParamsFrame, @@ -643,6 +644,8 @@ class LLMAssistantAggregator(LLMContextAggregator): await self._handle_thought_text(frame) elif isinstance(frame, LLMThoughtEndFrame): await self._handle_thought_end(frame) + elif isinstance(frame, LLMThoughtSignatureFrame): + await self._handle_standalone_thought_signature(frame) elif isinstance(frame, LLMRunFrame): await self._handle_llm_run(frame) elif isinstance(frame, LLMMessagesAppendFrame): @@ -907,6 +910,17 @@ class LLMAssistantAggregator(LLMContextAggregator): ) ) + async def _handle_standalone_thought_signature(self, frame: LLMThoughtSignatureFrame): + self._context.add_message( + LLMSpecificMessage( + llm=frame.llm, + message={ + "type": "thought_signature", + "signature": frame.signature, + }, + ) + ) + def _context_updated_task_finished(self, task: asyncio.Task): self._context_updated_tasks.discard(task) diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 114df5f28..408918287 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -35,6 +35,7 @@ from pipecat.frames.frames import ( LLMMessagesFrame, LLMTextFrame, LLMThoughtEndFrame, + LLMThoughtSignatureFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, LLMUpdateSettingsFrame, @@ -1000,6 +1001,17 @@ class GoogleLLMService(LLMService): ) await self.push_frame(frame) + # With Gemini 3 Pro, thought signatures can be + # included in any kind of part, not just function + # calls. It will come in the last part of a response. + if part.thought_signature and not part.function_call: + await self.push_frame( + LLMThoughtSignatureFrame( + llm=self.get_llm_adapter().id_for_llm_specific_messages, + signature=part.thought_signature, + ) + ) + if ( candidate.grounding_metadata and candidate.grounding_metadata.grounding_chunks