diff --git a/engine/text_input.py b/engine/text_input.py index f9b5c4e..2cc8007 100644 --- a/engine/text_input.py +++ b/engine/text_input.py @@ -6,6 +6,7 @@ from pipecat.frames.frames import ( Frame, InputTransportMessageFrame, LLMMessagesAppendFrame, + UserImageRawFrame, UserStartedSpeakingFrame, UserStoppedSpeakingFrame, ) @@ -13,11 +14,17 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor class ProductTextInputProcessor(FrameProcessor): - """Converts product text-input transport messages into LLM turns.""" + """Converts product text-input transport messages and marks image input as user activity.""" async def process_frame(self, frame: Frame, direction: FrameDirection): await super().process_frame(frame, direction) + if isinstance(frame, UserImageRawFrame): + await self.broadcast_frame(UserStartedSpeakingFrame) + await self.push_frame(frame, direction) + await self.broadcast_frame(UserStoppedSpeakingFrame) + return + if not isinstance(frame, InputTransportMessageFrame): await self.push_frame(frame, direction) return