From 419c7d44503a4624a83d965438d783c3ba5b1997 Mon Sep 17 00:00:00 2001
From: Om Chauhan <omchauhan64408@gmail.com>
Date: Wed, 18 Mar 2026 09:33:54 +0530
Subject: [PATCH 1/2] fix: default thinking config for Gemini 3+ Flash models

---
 src/pipecat/services/google/llm.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py
index 26ad46311..f80458d93 100644
--- a/src/pipecat/services/google/llm.py
+++ b/src/pipecat/services/google/llm.py
@@ -995,18 +995,20 @@ class GoogleLLMService(LLMService):
 
     def _maybe_unset_thinking_budget(self, generation_params: Dict[str, Any]):
         try:
-            # There's no way to introspect on model capabilities, so
-            # to check for models that we know default to thinkin on
-            # and can be configured to turn it off.
-            if not self._settings.model.startswith("gemini-2.5-flash"):
-                return
-            # If we have an image model, we don't use a budget either.
+            # If we have an image model, we don't apply a thinking default.
             if "image" in self._settings.model:
                 return
             # If thinking_config is already set, don't override it.
             if "thinking_config" in generation_params:
                 return
-            generation_params.setdefault("thinking_config", {})["thinking_budget"] = 0
+            # Apply model-aware low-latency thinking defaults.
+            # Gemini 2.5 Flash: disable thinking via thinking_budget.
+            # Gemini 3+ Flash: use minimal thinking via thinking_level.
+            model = self._settings.model
+            if model.startswith("gemini-2.5-flash"):
+                generation_params["thinking_config"] = {"thinking_budget": 0}
+            elif model.startswith("gemini-3") and "flash" in model:
+                generation_params["thinking_config"] = {"thinking_level": "minimal"}
         except Exception as e:
             logger.error(f"Failed to unset thinking budget: {e}")
 

From fa982a05c091b62c06e4a447b12766660a6cf145 Mon Sep 17 00:00:00 2001
From: Om Chauhan <omchauhan64408@gmail.com>
Date: Wed, 18 Mar 2026 09:46:15 +0530
Subject: [PATCH 2/2] added changelog

---
 changelog/4067.fixed.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog/4067.fixed.md

diff --git a/changelog/4067.fixed.md b/changelog/4067.fixed.md
new file mode 100644
index 000000000..b913b90d1
--- /dev/null
+++ b/changelog/4067.fixed.md
@@ -0,0 +1 @@
+- GoogleLLMService now applies a low-latency thinking default (`thinking_level="minimal"`) for Gemini 3+ Flash models.