diff --git a/engine/text_input.py b/engine/text_input.py
index f9b5c4e..2cc8007 100644
--- a/engine/text_input.py
+++ b/engine/text_input.py
@@ -6,6 +6,7 @@ from pipecat.frames.frames import (
     Frame,
     InputTransportMessageFrame,
     LLMMessagesAppendFrame,
+    UserImageRawFrame,
     UserStartedSpeakingFrame,
     UserStoppedSpeakingFrame,
 )
@@ -13,11 +14,17 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 
 
 class ProductTextInputProcessor(FrameProcessor):
-    """Converts product text-input transport messages into LLM turns."""
+    """Converts product text-input transport messages and marks image input as user activity."""
 
     async def process_frame(self, frame: Frame, direction: FrameDirection):
         await super().process_frame(frame, direction)
 
+        if isinstance(frame, UserImageRawFrame):
+            await self.broadcast_frame(UserStartedSpeakingFrame)
+            await self.push_frame(frame, direction)
+            await self.broadcast_frame(UserStoppedSpeakingFrame)
+            return
+
         if not isinstance(frame, InputTransportMessageFrame):
             await self.push_frame(frame, direction)
             return