diff --git a/examples/foundational/07n-interruptible-google.py b/examples/foundational/07n-interruptible-google.py
index e8613e082..319c97f78 100644
--- a/examples/foundational/07n-interruptible-google.py
+++ b/examples/foundational/07n-interruptible-google.py
@@ -61,7 +61,12 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
         credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
     )
 
-    llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
+    llm = GoogleLLMService(
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        model="gemini-2.5-flash",
+        # turn on thinking if you want it
+        # params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),)
+    )
 
     messages = [
         {
diff --git a/examples/foundational/07s-interruptible-google-audio-in.py b/examples/foundational/07s-interruptible-google-audio-in.py
index 9a7aa24b1..67701c53b 100644
--- a/examples/foundational/07s-interruptible-google-audio-in.py
+++ b/examples/foundational/07s-interruptible-google-audio-in.py
@@ -214,7 +214,12 @@ transport_params = {
 async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
     logger.info(f"Starting bot")
 
-    llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
+    llm = GoogleLLMService(
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        model="gemini-2.5-flash",
+        # turn on thinking if you want it
+        # params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),
+    )
 
     tts = GoogleTTSService(
         voice_id="en-US-Chirp3-HD-Charon",
diff --git a/src/pipecat/metrics/metrics.py b/src/pipecat/metrics/metrics.py
index 262254ffd..fbd5f9c8c 100644
--- a/src/pipecat/metrics/metrics.py
+++ b/src/pipecat/metrics/metrics.py
@@ -22,6 +22,7 @@ class LLMTokenUsage(BaseModel):
     total_tokens: int
     cache_read_input_tokens: Optional[int] = None
     cache_creation_input_tokens: Optional[int] = None
+    reasoning_tokens: Optional[int] = None
 
 
 class LLMUsageMetricsData(MetricsData):
diff --git a/src/pipecat/processors/metrics/frame_processor_metrics.py b/src/pipecat/processors/metrics/frame_processor_metrics.py
index 386164afe..fd93241ed 100644
--- a/src/pipecat/processors/metrics/frame_processor_metrics.py
+++ b/src/pipecat/processors/metrics/frame_processor_metrics.py
@@ -165,9 +165,12 @@ class FrameProcessorMetrics(BaseObject):
         Returns:
             MetricsFrame containing LLM usage data.
         """
-        logger.debug(
-            f"{self._processor_name()} prompt tokens: {tokens.prompt_tokens}, completion tokens: {tokens.completion_tokens}"
-        )
+        logstr = f"{self._processor_name()} prompt tokens: {tokens.prompt_tokens}, completion tokens: {tokens.completion_tokens}"
+        if tokens.cache_read_input_tokens:
+            logstr += f", cache read input tokens: {tokens.cache_read_input_tokens}"
+        if tokens.reasoning_tokens:
+            logstr += f", reasoning tokens: {tokens.reasoning_tokens}"
+        logger.debug(logstr)
         value = LLMUsageMetricsData(
             processor=self._processor_name(), model=self._model_name(), value=tokens
         )
diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py
index bd56b8416..86ed4dd88 100644
--- a/src/pipecat/services/google/llm.py
+++ b/src/pipecat/services/google/llm.py
@@ -638,6 +638,20 @@ class GoogleLLMService(LLMService):
     def _create_client(self, api_key: str):
         self._client = genai.Client(api_key=api_key)
 
+    def _maybe_unset_thinking_budget(self, generation_params: Dict[str, Any]):
+        try:
+            # There's no way to introspect on model capabilities, so
+            # to check for models that we know default to thinkin on
+            # and can be configured to turn it off.
+            if not self._model_name.startswith("gemini-2.5-flash"):
+                return
+            # If thinking_config is already set, don't override it.
+            if "thinking_config" in generation_params:
+                return
+            generation_params.setdefault("thinking_config", {})["thinking_budget"] = 0
+        except Exception as e:
+            logger.exception(f"Failed to unset thinking budget: {e}")
+
     @traced_llm
     async def _process_context(self, context: OpenAILLMContext):
         await self.push_frame(LLMFullResponseStartFrame())
@@ -645,6 +659,8 @@ class GoogleLLMService(LLMService):
         prompt_tokens = 0
         completion_tokens = 0
         total_tokens = 0
+        cache_read_input_tokens = 0
+        reasoning_tokens = 0
 
         grounding_metadata = None
         search_result = ""
@@ -684,6 +700,12 @@ class GoogleLLMService(LLMService):
                 if v is not None
             }
 
+            if self._settings["extra"]:
+                generation_params.update(self._settings["extra"])
+
+            # possibly modify generation_params (in place) to set thinking to off by default
+            self._maybe_unset_thinking_budget(generation_params)
+
             generation_config = (
                 GenerateContentConfig(**generation_params) if generation_params else None
             )
@@ -703,6 +725,8 @@ class GoogleLLMService(LLMService):
                     prompt_tokens += chunk.usage_metadata.prompt_token_count or 0
                     completion_tokens += chunk.usage_metadata.candidates_token_count or 0
                     total_tokens += chunk.usage_metadata.total_token_count or 0
+                    cache_read_input_tokens += chunk.usage_metadata.cached_content_token_count or 0
+                    reasoning_tokens += chunk.usage_metadata.thoughts_token_count or 0
 
                 if not chunk.candidates:
                     continue
@@ -784,6 +808,8 @@ class GoogleLLMService(LLMService):
                     prompt_tokens=prompt_tokens,
                     completion_tokens=completion_tokens,
                     total_tokens=total_tokens,
+                    cache_read_input_tokens=cache_read_input_tokens,
+                    reasoning_tokens=reasoning_tokens,
                 )
             )
             await self.push_frame(LLMFullResponseEndFrame())