diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 1b43f8ba3..6d2ece36e 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -1093,15 +1093,17 @@ class StartInterruptionFrame(InterruptionFrame): @dataclass class UserStartedSpeakingFrame(SystemFrame): - """Frame indicating user has started speaking. + """Frame indicating that the user turn has started. - Emitted by VAD to indicate that a user has started speaking. This can be - used for interruptions or other times when detecting that someone is - speaking is more important than knowing what they're saying (as you will - get with a TranscriptionFrame). + Emitted when the user turn starts, which usually means that some + transcriptions are already available. Parameters: emulated: Whether this event was emulated rather than detected by VAD. + + .. deprecated:: 0.0.99 + This field is deprecated and will be removed in a future version. + """ emulated: bool = False @@ -1109,12 +1111,17 @@ class UserStartedSpeakingFrame(SystemFrame): @dataclass class UserStoppedSpeakingFrame(SystemFrame): - """Frame indicating user has stopped speaking. + """Frame indicating that the user turn has ended. - Emitted by the VAD to indicate that a user stopped speaking. + Emitted when the user turn ends. This usually coincides with the start of + the bot turn. Parameters: emulated: Whether this event was emulated rather than detected by VAD. + + .. deprecated:: 0.0.99 + This field is deprecated and will be removed in a future version. + """ emulated: bool = False diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 56b5e9e22..9c687492c 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -407,7 +407,7 @@ class LLMUserAggregator(LLMContextAggregator): if self._params.enable_user_speaking_frames: logger.debug(f"User started speaking (user turn start strategy: {strategy})") # TODO(aleix): These frames should really come from the top of the pipeline. - await self.broadcast_frame(UserStartedSpeakingFrame, emulated=strategy is None) + await self.broadcast_frame(UserStartedSpeakingFrame) await self.broadcast_frame(InterruptionFrame) async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy): @@ -419,7 +419,7 @@ class LLMUserAggregator(LLMContextAggregator): if self._params.enable_user_speaking_frames: logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})") # TODO(aleix): This frame should really come from the top of the pipeline. - await self.broadcast_frame(UserStoppedSpeakingFrame, emulated=strategy is None) + await self.broadcast_frame(UserStoppedSpeakingFrame) # Always push context frame. await self.push_aggregation()