diff --git a/changelog/3748.added.md b/changelog/3748.added.md new file mode 100644 index 000000000..223f8bf4b --- /dev/null +++ b/changelog/3748.added.md @@ -0,0 +1 @@ +- Added `UserIdleTimeoutUpdateFrame` to enable or disable user idle detection at runtime by updating the timeout dynamically. diff --git a/changelog/3748.changed.md b/changelog/3748.changed.md new file mode 100644 index 000000000..61be61c6b --- /dev/null +++ b/changelog/3748.changed.md @@ -0,0 +1 @@ +- `UserIdleController` is now always created with a default timeout of 0 (disabled). The `user_idle_timeout` parameter changed from `Optional[float] = None` to `float = 0` in `UserTurnProcessor`, `LLMUserAggregatorParams`, and `UserIdleController`. diff --git a/examples/foundational/17-detect-user-idle.py b/examples/foundational/17-detect-user-idle.py index bb0ea8873..e6af5a364 100644 --- a/examples/foundational/17-detect-user-idle.py +++ b/examples/foundational/17-detect-user-idle.py @@ -19,6 +19,7 @@ from pipecat.frames.frames import ( LLMMessagesAppendFrame, LLMRunFrame, TTSSpeakFrame, + UserIdleTimeoutUpdateFrame, ) from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -210,6 +211,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): # Kick off the conversation. messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) + await asyncio.sleep(30) + logger.info(f"Disabling idle detection") + await task.queue_frames([UserIdleTimeoutUpdateFrame(timeout=0)]) + await asyncio.sleep(30) + logger.info(f"Enabling idle detection") + await task.queue_frames([UserIdleTimeoutUpdateFrame(timeout=5)]) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 8d237defc..c6c4421cd 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -2145,6 +2145,20 @@ class STTUpdateSettingsFrame(ServiceUpdateSettingsFrame): pass +@dataclass +class UserIdleTimeoutUpdateFrame(SystemFrame): + """Frame for updating the user idle timeout at runtime. + + Setting timeout to 0 disables idle detection. Setting a positive value + enables it. + + Parameters: + timeout: The new idle timeout in seconds. 0 disables idle detection. + """ + + timeout: float + + @dataclass class VADParamsUpdateFrame(ControlFrame): """Frame for updating VAD parameters. diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 0fb538b1a..f05f064fb 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -92,9 +92,9 @@ class LLMUserAggregatorParams: user_mute_strategies: List of user mute strategies. user_turn_stop_timeout: Time in seconds to wait before considering the user's turn finished. - user_idle_timeout: Optional timeout in seconds for detecting user idle state. - If set, the aggregator will emit an `on_user_turn_idle` event when the user - has been idle (not speaking) for this duration. Set to None to disable + user_idle_timeout: Timeout in seconds for detecting user idle state. + The aggregator will emit an `on_user_turn_idle` event when the user + has been idle (not speaking) for this duration. Set to 0 to disable idle detection. vad_analyzer: Voice Activity Detection analyzer instance. filter_incomplete_user_turns: Whether to filter out incomplete user turns. @@ -109,7 +109,7 @@ class LLMUserAggregatorParams: user_turn_strategies: Optional[UserTurnStrategies] = None user_mute_strategies: List[BaseUserMuteStrategy] = field(default_factory=list) user_turn_stop_timeout: float = 5.0 - user_idle_timeout: Optional[float] = None + user_idle_timeout: float = 0 vad_analyzer: Optional[VADAnalyzer] = None filter_incomplete_user_turns: bool = False user_turn_completion_config: Optional[UserTurnCompletionConfig] = None @@ -404,15 +404,10 @@ class LLMUserAggregator(LLMContextAggregator): "on_user_turn_stop_timeout", self._on_user_turn_stop_timeout ) - # Optional user idle controller - self._user_idle_controller: Optional[UserIdleController] = None - if self._params.user_idle_timeout: - self._user_idle_controller = UserIdleController( - user_idle_timeout=self._params.user_idle_timeout - ) - self._user_idle_controller.add_event_handler( - "on_user_turn_idle", self._on_user_turn_idle - ) + self._user_idle_controller = UserIdleController( + user_idle_timeout=self._params.user_idle_timeout + ) + self._user_idle_controller.add_event_handler("on_user_turn_idle", self._on_user_turn_idle) # VAD controller self._vad_controller: Optional[VADController] = None @@ -489,8 +484,7 @@ class LLMUserAggregator(LLMContextAggregator): await self._user_turn_controller.process_frame(frame) - if self._user_idle_controller: - await self._user_idle_controller.process_frame(frame) + await self._user_idle_controller.process_frame(frame) async def push_aggregation(self) -> str: """Push the current aggregation.""" @@ -507,8 +501,7 @@ class LLMUserAggregator(LLMContextAggregator): async def _start(self, frame: StartFrame): await self._user_turn_controller.setup(self.task_manager) - if self._user_idle_controller: - await self._user_idle_controller.setup(self.task_manager) + await self._user_idle_controller.setup(self.task_manager) for s in self._params.user_mute_strategies: await s.setup(self.task_manager) @@ -541,9 +534,7 @@ class LLMUserAggregator(LLMContextAggregator): async def _cleanup(self): await self._user_turn_controller.cleanup() - - if self._user_idle_controller: - await self._user_idle_controller.cleanup() + await self._user_idle_controller.cleanup() for s in self._params.user_mute_strategies: await s.cleanup() @@ -689,8 +680,7 @@ class LLMUserAggregator(LLMContextAggregator): if params.enable_user_speaking_frames: await self.broadcast_frame(UserStartedSpeakingFrame) - if self._user_idle_controller: - await self._user_idle_controller.process_frame(UserStartedSpeakingFrame()) + await self._user_idle_controller.process_frame(UserStartedSpeakingFrame()) if params.enable_interruptions and self._allow_interruptions: await self.push_interruption_task_frame_and_wait() @@ -708,8 +698,7 @@ class LLMUserAggregator(LLMContextAggregator): if params.enable_user_speaking_frames: await self.broadcast_frame(UserStoppedSpeakingFrame) - if self._user_idle_controller: - await self._user_idle_controller.process_frame(UserStoppedSpeakingFrame()) + await self._user_idle_controller.process_frame(UserStoppedSpeakingFrame()) await self._maybe_emit_user_turn_stopped(strategy) diff --git a/src/pipecat/turns/user_idle_controller.py b/src/pipecat/turns/user_idle_controller.py index b4dc80772..b3b7e8074 100644 --- a/src/pipecat/turns/user_idle_controller.py +++ b/src/pipecat/turns/user_idle_controller.py @@ -16,6 +16,7 @@ from pipecat.frames.frames import ( FunctionCallCancelFrame, FunctionCallResultFrame, FunctionCallsStartedFrame, + UserIdleTimeoutUpdateFrame, UserStartedSpeakingFrame, UserStoppedSpeakingFrame, ) @@ -51,12 +52,13 @@ class UserIdleController(BaseObject): def __init__( self, *, - user_idle_timeout: float, + user_idle_timeout: float = 0, ): """Initialize the user idle controller. Args: user_idle_timeout: Timeout in seconds before considering the user idle. + 0 disables idle detection. """ super().__init__() @@ -96,6 +98,12 @@ class UserIdleController(BaseObject): Args: frame: The frame to be processed. """ + if isinstance(frame, UserIdleTimeoutUpdateFrame): + self._user_idle_timeout = frame.timeout + if self._user_idle_timeout <= 0: + await self._cancel_idle_timer() + return + if isinstance(frame, BotStoppedSpeakingFrame): # Only start the timer if the user isn't mid-turn and no function # calls are pending. @@ -128,6 +136,8 @@ class UserIdleController(BaseObject): async def _start_idle_timer(self): """Start (or restart) the idle timer.""" + if self._user_idle_timeout <= 0: + return await self._cancel_idle_timer() self._idle_timer_task = self.task_manager.create_task( self._idle_timer_expired(), diff --git a/src/pipecat/turns/user_turn_processor.py b/src/pipecat/turns/user_turn_processor.py index 720a8b854..7f8995202 100644 --- a/src/pipecat/turns/user_turn_processor.py +++ b/src/pipecat/turns/user_turn_processor.py @@ -66,7 +66,7 @@ class UserTurnProcessor(FrameProcessor): *, user_turn_strategies: Optional[UserTurnStrategies] = None, user_turn_stop_timeout: float = 5.0, - user_idle_timeout: Optional[float] = None, + user_idle_timeout: float = 0, **kwargs, ): """Initialize the user turn processor. @@ -75,9 +75,9 @@ class UserTurnProcessor(FrameProcessor): user_turn_strategies: Configured strategies for starting and stopping user turns. user_turn_stop_timeout: Timeout in seconds to automatically stop a user turn if no activity is detected. - user_idle_timeout: Optional timeout in seconds for detecting user idle state. - If set, the processor will emit an `on_user_turn_idle` event when the user - has been idle (not speaking) for this duration. Set to None to disable + user_idle_timeout: Timeout in seconds for detecting user idle state. + The processor will emit an `on_user_turn_idle` event when the user + has been idle (not speaking) for this duration. Set to 0 to disable idle detection. **kwargs: Additional keyword arguments. """ @@ -104,13 +104,8 @@ class UserTurnProcessor(FrameProcessor): "on_user_turn_stop_timeout", self._on_user_turn_stop_timeout ) - # Optional user idle controller - self._user_idle_controller: Optional[UserIdleController] = None - if user_idle_timeout: - self._user_idle_controller = UserIdleController(user_idle_timeout=user_idle_timeout) - self._user_idle_controller.add_event_handler( - "on_user_turn_idle", self._on_user_turn_idle - ) + self._user_idle_controller = UserIdleController(user_idle_timeout=user_idle_timeout) + self._user_idle_controller.add_event_handler("on_user_turn_idle", self._on_user_turn_idle) async def cleanup(self): """Clean up processor resources.""" @@ -149,14 +144,11 @@ class UserTurnProcessor(FrameProcessor): await self._user_turn_controller.process_frame(frame) - if self._user_idle_controller: - await self._user_idle_controller.process_frame(frame) + await self._user_idle_controller.process_frame(frame) async def _start(self, frame: StartFrame): await self._user_turn_controller.setup(self.task_manager) - - if self._user_idle_controller: - await self._user_idle_controller.setup(self.task_manager) + await self._user_idle_controller.setup(self.task_manager) async def _stop(self, frame: EndFrame): await self._cleanup() @@ -166,9 +158,7 @@ class UserTurnProcessor(FrameProcessor): async def _cleanup(self): await self._user_turn_controller.cleanup() - - if self._user_idle_controller: - await self._user_idle_controller.cleanup() + await self._user_idle_controller.cleanup() async def _on_push_frame( self, controller, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM @@ -189,8 +179,7 @@ class UserTurnProcessor(FrameProcessor): if params.enable_user_speaking_frames: await self.broadcast_frame(UserStartedSpeakingFrame) - if self._user_idle_controller: - await self._user_idle_controller.process_frame(UserStartedSpeakingFrame()) + await self._user_idle_controller.process_frame(UserStartedSpeakingFrame()) if params.enable_interruptions and self._allow_interruptions: await self.push_interruption_task_frame_and_wait() @@ -208,8 +197,7 @@ class UserTurnProcessor(FrameProcessor): if params.enable_user_speaking_frames: await self.broadcast_frame(UserStoppedSpeakingFrame) - if self._user_idle_controller: - await self._user_idle_controller.process_frame(UserStoppedSpeakingFrame()) + await self._user_idle_controller.process_frame(UserStoppedSpeakingFrame()) await self._call_event_handler("on_user_turn_stopped", strategy) diff --git a/tests/test_user_idle_controller.py b/tests/test_user_idle_controller.py index 6975d6e74..646223d37 100644 --- a/tests/test_user_idle_controller.py +++ b/tests/test_user_idle_controller.py @@ -13,6 +13,7 @@ from pipecat.frames.frames import ( BotStoppedSpeakingFrame, FunctionCallResultFrame, FunctionCallsStartedFrame, + UserIdleTimeoutUpdateFrame, UserStartedSpeakingFrame, ) from pipecat.turns.user_idle_controller import UserIdleController @@ -247,6 +248,76 @@ class TestUserIdleController(unittest.IsolatedAsyncioTestCase): await controller.cleanup() + async def test_disabled_by_default(self): + """Test that timeout=0 means idle detection is disabled.""" + controller = UserIdleController() + await controller.setup(self.task_manager) + + idle_triggered = False + + @controller.event_handler("on_user_turn_idle") + async def on_user_turn_idle(controller): + nonlocal idle_triggered + idle_triggered = True + + await controller.process_frame(BotStoppedSpeakingFrame()) + await asyncio.sleep(USER_IDLE_TIMEOUT + 0.1) + + self.assertFalse(idle_triggered) + + await controller.cleanup() + + async def test_enable_via_frame(self): + """Test enabling idle detection at runtime via UserIdleTimeoutUpdateFrame.""" + controller = UserIdleController() + await controller.setup(self.task_manager) + + idle_triggered = False + + @controller.event_handler("on_user_turn_idle") + async def on_user_turn_idle(controller): + nonlocal idle_triggered + idle_triggered = True + + # Initially disabled — no idle fires + await controller.process_frame(BotStoppedSpeakingFrame()) + await asyncio.sleep(USER_IDLE_TIMEOUT + 0.1) + self.assertFalse(idle_triggered) + + # Enable idle detection + await controller.process_frame(UserIdleTimeoutUpdateFrame(timeout=USER_IDLE_TIMEOUT)) + await controller.process_frame(BotStoppedSpeakingFrame()) + await asyncio.sleep(USER_IDLE_TIMEOUT + 0.1) + + self.assertTrue(idle_triggered) + + await controller.cleanup() + + async def test_disable_via_frame(self): + """Test disabling idle detection at runtime via UserIdleTimeoutUpdateFrame.""" + controller = UserIdleController(user_idle_timeout=USER_IDLE_TIMEOUT) + await controller.setup(self.task_manager) + + idle_triggered = False + + @controller.event_handler("on_user_turn_idle") + async def on_user_turn_idle(controller): + nonlocal idle_triggered + idle_triggered = True + + # Start the timer + await controller.process_frame(BotStoppedSpeakingFrame()) + await asyncio.sleep(USER_IDLE_TIMEOUT * 0.3) + + # Disable — should cancel running timer + await controller.process_frame(UserIdleTimeoutUpdateFrame(timeout=0)) + + await asyncio.sleep(USER_IDLE_TIMEOUT + 0.1) + + self.assertFalse(idle_triggered) + + await controller.cleanup() + if __name__ == "__main__": unittest.main()