Add support for Gemini 3 Pro non-function-call-related thought signatures
This commit is contained in:
@@ -108,8 +108,10 @@ async def run_bot(
|
||||
elif llm_provider == LLM_GOOGLE:
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
# model="gemini-3-pro-preview", # A more powerful reasoning model, but slower
|
||||
params=GoogleLLMService.InputParams(
|
||||
thinking=GoogleLLMService.ThinkingConfig(
|
||||
# thinking_level="low", # Use this field instead of thinking_budget for Gemini 3 Pro. Defaults to "high".
|
||||
thinking_budget=-1, # Dynamic thinking
|
||||
include_thoughts=True,
|
||||
)
|
||||
|
||||
@@ -87,8 +87,10 @@ async def run_bot(
|
||||
elif llm_provider == LLM_GOOGLE:
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
# model="gemini-3-pro-preview", # A more powerful reasoning model, but slower
|
||||
params=GoogleLLMService.InputParams(
|
||||
thinking=GoogleLLMService.ThinkingConfig(
|
||||
# thinking_level="low", # Use this field instead of thinking_budget for Gemini 3 Pro. Defaults to "high".
|
||||
thinking_budget=-1, # Dynamic thinking
|
||||
include_thoughts=True,
|
||||
)
|
||||
|
||||
@@ -210,6 +210,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
||||
system_instruction = None
|
||||
messages = []
|
||||
tool_call_id_to_name_mapping = {}
|
||||
non_fn_thought_signatures = []
|
||||
|
||||
# Process each message, preserving Google-formatted messages and converting others
|
||||
for message in universal_context_messages:
|
||||
@@ -234,6 +235,17 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
||||
)
|
||||
continue
|
||||
|
||||
# If we found a standalone non-function-call-related thought
|
||||
# signature (Gemini 3 Pro), store it to apply later to the
|
||||
# corresponding assistant message
|
||||
if (
|
||||
isinstance(result.content, dict)
|
||||
and result.content.get("type") == "thought_signature"
|
||||
and (thought_signature := result.content.get("signature"))
|
||||
):
|
||||
non_fn_thought_signatures.append(thought_signature)
|
||||
continue
|
||||
|
||||
# Each result is either a Content or a system instruction
|
||||
if result.content:
|
||||
messages.append(result.content)
|
||||
@@ -244,6 +256,10 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
||||
if result.tool_call_id_to_name_mapping:
|
||||
tool_call_id_to_name_mapping.update(result.tool_call_id_to_name_mapping)
|
||||
|
||||
# Apply non-function-call-related thought signatures to the appropriate
|
||||
# messages
|
||||
self._apply_non_function_thought_signatures_to_messages(non_fn_thought_signatures, messages)
|
||||
|
||||
# Check if we only have function-related messages (no regular text)
|
||||
has_regular_messages = any(
|
||||
len(msg.parts) == 1
|
||||
@@ -434,7 +450,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
||||
Args:
|
||||
thought_signature: The thought signature bytes to apply.
|
||||
tool_call_id: ID of the tool call message to find and modify.
|
||||
messages: List of Content messages to search through.
|
||||
messages: List of messages to search through.
|
||||
"""
|
||||
# Search backwards through messages to find the matching function call
|
||||
for message in reversed(messages):
|
||||
@@ -454,3 +470,46 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
||||
continue
|
||||
# Break outer loop if inner loop broke (found match)
|
||||
break
|
||||
|
||||
def _apply_non_function_thought_signatures_to_messages(
|
||||
self, thought_signatures: List[bytes], messages: List[Content]
|
||||
) -> None:
|
||||
"""Apply non-function-call-related thought signatures to the last part of each non-function-call assistant message.
|
||||
|
||||
Gemini 3 Pro outputs a thought signature at the end of each assistant
|
||||
response.
|
||||
|
||||
Args:
|
||||
thought_signatures: The list of thought signature bytes to apply.
|
||||
messages: List of messages to search through.
|
||||
"""
|
||||
if not thought_signatures:
|
||||
return
|
||||
|
||||
# Find all assistant (model) messages that aren't function calls
|
||||
non_fn_assistant_messages = []
|
||||
for message in messages:
|
||||
if not isinstance(message, Content) or not message.parts:
|
||||
continue
|
||||
# Check if this is a model message without function calls
|
||||
if message.role == "model":
|
||||
has_function_call = any(
|
||||
hasattr(part, "function_call") and part.function_call for part in message.parts
|
||||
)
|
||||
if not has_function_call:
|
||||
non_fn_assistant_messages.append(message)
|
||||
|
||||
# Warn if counts don't match
|
||||
if len(thought_signatures) != len(non_fn_assistant_messages):
|
||||
logger.warning(
|
||||
f"Thought signature count ({len(thought_signatures)}) doesn't match "
|
||||
f"non-function-call assistant message count ({len(non_fn_assistant_messages)})"
|
||||
)
|
||||
|
||||
# Apply thought signatures to the corresponding assistant messages
|
||||
# Match them in order (oldest to newest)
|
||||
for i, thought_signature in enumerate(thought_signatures):
|
||||
if i < len(non_fn_assistant_messages):
|
||||
message = non_fn_assistant_messages[i]
|
||||
if message.parts:
|
||||
message.parts[-1].thought_signature = thought_signature
|
||||
|
||||
@@ -594,7 +594,8 @@ class LLMThoughtStartFrame(ControlFrame):
|
||||
If it is appended, the `llm` field is required, since it will be
|
||||
appended as an `LLMSpecificMessage`.
|
||||
llm: Optional identifier of the LLM provider for LLM-specific handling.
|
||||
Only required if `append_to_context` is True.
|
||||
Only required if `append_to_context` is True, as the thought is
|
||||
appended to context as an `LLMSpecificMessage`.
|
||||
"""
|
||||
|
||||
append_to_context: bool = False
|
||||
@@ -642,7 +643,7 @@ class LLMThoughtEndFrame(ControlFrame):
|
||||
|
||||
Parameters:
|
||||
thought_metadata: Optional metadata associated with the thought,
|
||||
e.g. thought signature.
|
||||
e.g. an Anthropic thought signature.
|
||||
"""
|
||||
|
||||
thought_metadata: Optional[Dict[str, Any]] = None
|
||||
@@ -652,6 +653,28 @@ class LLMThoughtEndFrame(ControlFrame):
|
||||
return f"{self.name}(pts: {pts}, metadata: {self.thought_metadata})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMThoughtSignatureFrame(DataFrame):
|
||||
"""Frame containing a standalone LLM thought signature (as opposed to a thought signature associated with a thought).
|
||||
|
||||
This is useful for Gemini 3 Pro, which can output a signature at the end of
|
||||
a response.
|
||||
|
||||
Parameters:
|
||||
llm: Identifier of the LLM provider for LLM-specific handling.
|
||||
Needed because the thought signature is appended to context as an
|
||||
`LLMSpecificMessage`.
|
||||
signature: The thought signature data.
|
||||
"""
|
||||
|
||||
llm: str
|
||||
signature: Any
|
||||
|
||||
def __str__(self):
|
||||
pts = format_pts(self.pts)
|
||||
return f"{self.name}(pts: {pts}, signature: {self.signature})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMMessagesFrame(DataFrame):
|
||||
"""Frame containing LLM messages for chat completion.
|
||||
|
||||
@@ -48,6 +48,7 @@ from pipecat.frames.frames import (
|
||||
LLMSetToolChoiceFrame,
|
||||
LLMSetToolsFrame,
|
||||
LLMThoughtEndFrame,
|
||||
LLMThoughtSignatureFrame,
|
||||
LLMThoughtStartFrame,
|
||||
LLMThoughtTextFrame,
|
||||
SpeechControlParamsFrame,
|
||||
@@ -643,6 +644,8 @@ class LLMAssistantAggregator(LLMContextAggregator):
|
||||
await self._handle_thought_text(frame)
|
||||
elif isinstance(frame, LLMThoughtEndFrame):
|
||||
await self._handle_thought_end(frame)
|
||||
elif isinstance(frame, LLMThoughtSignatureFrame):
|
||||
await self._handle_standalone_thought_signature(frame)
|
||||
elif isinstance(frame, LLMRunFrame):
|
||||
await self._handle_llm_run(frame)
|
||||
elif isinstance(frame, LLMMessagesAppendFrame):
|
||||
@@ -907,6 +910,17 @@ class LLMAssistantAggregator(LLMContextAggregator):
|
||||
)
|
||||
)
|
||||
|
||||
async def _handle_standalone_thought_signature(self, frame: LLMThoughtSignatureFrame):
|
||||
self._context.add_message(
|
||||
LLMSpecificMessage(
|
||||
llm=frame.llm,
|
||||
message={
|
||||
"type": "thought_signature",
|
||||
"signature": frame.signature,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
def _context_updated_task_finished(self, task: asyncio.Task):
|
||||
self._context_updated_tasks.discard(task)
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ from pipecat.frames.frames import (
|
||||
LLMMessagesFrame,
|
||||
LLMTextFrame,
|
||||
LLMThoughtEndFrame,
|
||||
LLMThoughtSignatureFrame,
|
||||
LLMThoughtStartFrame,
|
||||
LLMThoughtTextFrame,
|
||||
LLMUpdateSettingsFrame,
|
||||
@@ -1000,6 +1001,17 @@ class GoogleLLMService(LLMService):
|
||||
)
|
||||
await self.push_frame(frame)
|
||||
|
||||
# With Gemini 3 Pro, thought signatures can be
|
||||
# included in any kind of part, not just function
|
||||
# calls. It will come in the last part of a response.
|
||||
if part.thought_signature and not part.function_call:
|
||||
await self.push_frame(
|
||||
LLMThoughtSignatureFrame(
|
||||
llm=self.get_llm_adapter().id_for_llm_specific_messages,
|
||||
signature=part.thought_signature,
|
||||
)
|
||||
)
|
||||
|
||||
if (
|
||||
candidate.grounding_metadata
|
||||
and candidate.grounding_metadata.grounding_chunks
|
||||
|
||||
Reference in New Issue
Block a user