Add support for Gemini 3 Pro non-function-call-related thought signatures

This commit is contained in:
Paul Kompfner
2025-12-04 14:29:08 -05:00
parent 0cdf0c4504
commit c8c6f424cd
6 changed files with 115 additions and 3 deletions

View File

@@ -108,8 +108,10 @@ async def run_bot(
elif llm_provider == LLM_GOOGLE:
llm = GoogleLLMService(
api_key=os.getenv("GOOGLE_API_KEY"),
# model="gemini-3-pro-preview", # A more powerful reasoning model, but slower
params=GoogleLLMService.InputParams(
thinking=GoogleLLMService.ThinkingConfig(
# thinking_level="low", # Use this field instead of thinking_budget for Gemini 3 Pro. Defaults to "high".
thinking_budget=-1, # Dynamic thinking
include_thoughts=True,
)

View File

@@ -87,8 +87,10 @@ async def run_bot(
elif llm_provider == LLM_GOOGLE:
llm = GoogleLLMService(
api_key=os.getenv("GOOGLE_API_KEY"),
# model="gemini-3-pro-preview", # A more powerful reasoning model, but slower
params=GoogleLLMService.InputParams(
thinking=GoogleLLMService.ThinkingConfig(
# thinking_level="low", # Use this field instead of thinking_budget for Gemini 3 Pro. Defaults to "high".
thinking_budget=-1, # Dynamic thinking
include_thoughts=True,
)

View File

@@ -210,6 +210,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
system_instruction = None
messages = []
tool_call_id_to_name_mapping = {}
non_fn_thought_signatures = []
# Process each message, preserving Google-formatted messages and converting others
for message in universal_context_messages:
@@ -234,6 +235,17 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
)
continue
# If we found a standalone non-function-call-related thought
# signature (Gemini 3 Pro), store it to apply later to the
# corresponding assistant message
if (
isinstance(result.content, dict)
and result.content.get("type") == "thought_signature"
and (thought_signature := result.content.get("signature"))
):
non_fn_thought_signatures.append(thought_signature)
continue
# Each result is either a Content or a system instruction
if result.content:
messages.append(result.content)
@@ -244,6 +256,10 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
if result.tool_call_id_to_name_mapping:
tool_call_id_to_name_mapping.update(result.tool_call_id_to_name_mapping)
# Apply non-function-call-related thought signatures to the appropriate
# messages
self._apply_non_function_thought_signatures_to_messages(non_fn_thought_signatures, messages)
# Check if we only have function-related messages (no regular text)
has_regular_messages = any(
len(msg.parts) == 1
@@ -434,7 +450,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
Args:
thought_signature: The thought signature bytes to apply.
tool_call_id: ID of the tool call message to find and modify.
messages: List of Content messages to search through.
messages: List of messages to search through.
"""
# Search backwards through messages to find the matching function call
for message in reversed(messages):
@@ -454,3 +470,46 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
continue
# Break outer loop if inner loop broke (found match)
break
def _apply_non_function_thought_signatures_to_messages(
self, thought_signatures: List[bytes], messages: List[Content]
) -> None:
"""Apply non-function-call-related thought signatures to the last part of each non-function-call assistant message.
Gemini 3 Pro outputs a thought signature at the end of each assistant
response.
Args:
thought_signatures: The list of thought signature bytes to apply.
messages: List of messages to search through.
"""
if not thought_signatures:
return
# Find all assistant (model) messages that aren't function calls
non_fn_assistant_messages = []
for message in messages:
if not isinstance(message, Content) or not message.parts:
continue
# Check if this is a model message without function calls
if message.role == "model":
has_function_call = any(
hasattr(part, "function_call") and part.function_call for part in message.parts
)
if not has_function_call:
non_fn_assistant_messages.append(message)
# Warn if counts don't match
if len(thought_signatures) != len(non_fn_assistant_messages):
logger.warning(
f"Thought signature count ({len(thought_signatures)}) doesn't match "
f"non-function-call assistant message count ({len(non_fn_assistant_messages)})"
)
# Apply thought signatures to the corresponding assistant messages
# Match them in order (oldest to newest)
for i, thought_signature in enumerate(thought_signatures):
if i < len(non_fn_assistant_messages):
message = non_fn_assistant_messages[i]
if message.parts:
message.parts[-1].thought_signature = thought_signature

View File

@@ -594,7 +594,8 @@ class LLMThoughtStartFrame(ControlFrame):
If it is appended, the `llm` field is required, since it will be
appended as an `LLMSpecificMessage`.
llm: Optional identifier of the LLM provider for LLM-specific handling.
Only required if `append_to_context` is True.
Only required if `append_to_context` is True, as the thought is
appended to context as an `LLMSpecificMessage`.
"""
append_to_context: bool = False
@@ -642,7 +643,7 @@ class LLMThoughtEndFrame(ControlFrame):
Parameters:
thought_metadata: Optional metadata associated with the thought,
e.g. thought signature.
e.g. an Anthropic thought signature.
"""
thought_metadata: Optional[Dict[str, Any]] = None
@@ -652,6 +653,28 @@ class LLMThoughtEndFrame(ControlFrame):
return f"{self.name}(pts: {pts}, metadata: {self.thought_metadata})"
@dataclass
class LLMThoughtSignatureFrame(DataFrame):
"""Frame containing a standalone LLM thought signature (as opposed to a thought signature associated with a thought).
This is useful for Gemini 3 Pro, which can output a signature at the end of
a response.
Parameters:
llm: Identifier of the LLM provider for LLM-specific handling.
Needed because the thought signature is appended to context as an
`LLMSpecificMessage`.
signature: The thought signature data.
"""
llm: str
signature: Any
def __str__(self):
pts = format_pts(self.pts)
return f"{self.name}(pts: {pts}, signature: {self.signature})"
@dataclass
class LLMMessagesFrame(DataFrame):
"""Frame containing LLM messages for chat completion.

View File

@@ -48,6 +48,7 @@ from pipecat.frames.frames import (
LLMSetToolChoiceFrame,
LLMSetToolsFrame,
LLMThoughtEndFrame,
LLMThoughtSignatureFrame,
LLMThoughtStartFrame,
LLMThoughtTextFrame,
SpeechControlParamsFrame,
@@ -643,6 +644,8 @@ class LLMAssistantAggregator(LLMContextAggregator):
await self._handle_thought_text(frame)
elif isinstance(frame, LLMThoughtEndFrame):
await self._handle_thought_end(frame)
elif isinstance(frame, LLMThoughtSignatureFrame):
await self._handle_standalone_thought_signature(frame)
elif isinstance(frame, LLMRunFrame):
await self._handle_llm_run(frame)
elif isinstance(frame, LLMMessagesAppendFrame):
@@ -907,6 +910,17 @@ class LLMAssistantAggregator(LLMContextAggregator):
)
)
async def _handle_standalone_thought_signature(self, frame: LLMThoughtSignatureFrame):
self._context.add_message(
LLMSpecificMessage(
llm=frame.llm,
message={
"type": "thought_signature",
"signature": frame.signature,
},
)
)
def _context_updated_task_finished(self, task: asyncio.Task):
self._context_updated_tasks.discard(task)

View File

@@ -35,6 +35,7 @@ from pipecat.frames.frames import (
LLMMessagesFrame,
LLMTextFrame,
LLMThoughtEndFrame,
LLMThoughtSignatureFrame,
LLMThoughtStartFrame,
LLMThoughtTextFrame,
LLMUpdateSettingsFrame,
@@ -1000,6 +1001,17 @@ class GoogleLLMService(LLMService):
)
await self.push_frame(frame)
# With Gemini 3 Pro, thought signatures can be
# included in any kind of part, not just function
# calls. It will come in the last part of a response.
if part.thought_signature and not part.function_call:
await self.push_frame(
LLMThoughtSignatureFrame(
llm=self.get_llm_adapter().id_for_llm_specific_messages,
signature=part.thought_signature,
)
)
if (
candidate.grounding_metadata
and candidate.grounding_metadata.grounding_chunks