LLMUserAggregator: allow external user started/stopped speaking frames

2025-12-11 10:29:56 -08:00
parent 962eb73cc4
commit d33c72a8b0
1 changed files with 25 additions and 8 deletions
--- a/src/pipecat/processors/aggregators/llm_response_universal.py
+++ b/src/pipecat/processors/aggregators/llm_response_universal.py
@@ -15,6 +15,7 @@ import asyncio
 import json
 import warnings
 from abc import abstractmethod
+from dataclasses import dataclass
 from typing import Any, Dict, List, Literal, Optional, Set

 from loguru import logger
@@ -58,7 +59,6 @@ from pipecat.processors.aggregators.llm_context import (
 )
 from pipecat.processors.aggregators.llm_response import (
    LLMAssistantAggregatorParams,
-    LLMUserAggregatorParams,
 )
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy
@@ -67,6 +67,21 @@ from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregate
 from pipecat.utils.time import time_now_iso8601


+@dataclass
+class LLMUserAggregatorParams:
+    """Parameters for configuring LLM user aggregation behavior.
+
+    Parameters:
+        enable_user_speaking_frames: If True, the aggregator will emit frames
+            indicating when the user starts and stops speaking, as well as
+            interruption frames. This is enabled by default, but you may want
+            to disable it if another component (e.g., an STT service) is already
+            generating these frames.
+    """
+
+    enable_user_speaking_frames: bool = True
+
+
 class LLMContextAggregator(FrameProcessor):
    """Base LLM aggregator that uses an LLMContext for conversation storage.

@@ -370,15 +385,15 @@ class LLMUserAggregator(LLMContextAggregator):

        self._user_speaking = True

-        logger.debug(f"User started speaking (user turn start strategy: {strategy})")
-
        # Reset all user turn start strategies to start fresh.
        if self.turn_start_strategies:
            for s in self.turn_start_strategies.user:
                await s.reset()

-        await self.push_frame(UserStartedSpeakingFrame())
-        await self.push_frame(InterruptionFrame())
+        if self._params.enable_user_speaking_frames:
+            logger.debug(f"User started speaking (user turn start strategy: {strategy})")
+            await self.push_frame(UserStartedSpeakingFrame())
+            await self.push_frame(InterruptionFrame())

    async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy):
        if not self._user_speaking:
@@ -386,14 +401,16 @@ class LLMUserAggregator(LLMContextAggregator):

        self._user_speaking = False

-        logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})")
-
        # Reset all bot turn start strategies to start fresh.
        if self.turn_start_strategies:
            for s in self.turn_start_strategies.bot:
                await s.reset()

-        await self.push_frame(UserStoppedSpeakingFrame())
+        if self._params.enable_user_speaking_frames:
+            logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})")
+            await self.push_frame(UserStoppedSpeakingFrame())
+
+        # Always push context frame.
        await self.push_aggregation()