diff --git a/CHANGELOG.md b/CHANGELOG.md
index afba77a64..3d41cb420 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -78,6 +78,9 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
 
 ### Fixed
 
+- Fixed a `GoogleLLMService` that was causing an exception when sending inline
+  audio in some cases.
+
 - Fixed an `AudioContextWordTTSService` issue that would cause an `EndFrame` to
   disconnect from the TTS service before audio from all the contexts was
   received. This affected services like Cartesia and Rime.
diff --git a/src/pipecat/services/google/google.py b/src/pipecat/services/google/google.py
index 1dafc92bc..c0941ee33 100644
--- a/src/pipecat/services/google/google.py
+++ b/src/pipecat/services/google/google.py
@@ -722,7 +722,9 @@ class GoogleLLMContext(OpenAILLMContext):
 
         self.add_message(glm.Content(role="user", parts=parts))
 
-    def add_audio_frames_message(self, *, audio_frames: list[AudioRawFrame], text: str = None):
+    def add_audio_frames_message(
+        self, *, audio_frames: list[AudioRawFrame], text: str = "Audio follows"
+    ):
         if not audio_frames:
             return
 
@@ -731,8 +733,9 @@ class GoogleLLMContext(OpenAILLMContext):
 
         parts = []
         data = b"".join(frame.audio for frame in audio_frames)
-        if text:
-            parts.append(glm.Part(text=text))
+        # NOTE(aleix): According to the docs only text or inline_data should be needed.
+        # (see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference)
+        parts.append(glm.Part(text=text))
         parts.append(
             glm.Part(
                 inline_data=glm.Blob(