From d33c72a8b0bd7b874f70bcd5099f6f3aa20193cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= <aleix@daily.co>
Date: Thu, 11 Dec 2025 10:29:56 -0800
Subject: [PATCH] LLMUserAggregator: allow external user started/stopped
 speaking frames

---
 .../aggregators/llm_response_universal.py     | 33 ++++++++++++++-----
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py
index df3f707c3..a007c5ebd 100644
--- a/src/pipecat/processors/aggregators/llm_response_universal.py
+++ b/src/pipecat/processors/aggregators/llm_response_universal.py
@@ -15,6 +15,7 @@ import asyncio
 import json
 import warnings
 from abc import abstractmethod
+from dataclasses import dataclass
 from typing import Any, Dict, List, Literal, Optional, Set
 
 from loguru import logger
@@ -58,7 +59,6 @@ from pipecat.processors.aggregators.llm_context import (
 )
 from pipecat.processors.aggregators.llm_response import (
     LLMAssistantAggregatorParams,
-    LLMUserAggregatorParams,
 )
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy
@@ -67,6 +67,21 @@ from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregate
 from pipecat.utils.time import time_now_iso8601
 
 
+@dataclass
+class LLMUserAggregatorParams:
+    """Parameters for configuring LLM user aggregation behavior.
+
+    Parameters:
+        enable_user_speaking_frames: If True, the aggregator will emit frames
+            indicating when the user starts and stops speaking, as well as
+            interruption frames. This is enabled by default, but you may want
+            to disable it if another component (e.g., an STT service) is already
+            generating these frames.
+    """
+
+    enable_user_speaking_frames: bool = True
+
+
 class LLMContextAggregator(FrameProcessor):
     """Base LLM aggregator that uses an LLMContext for conversation storage.
 
@@ -370,15 +385,15 @@ class LLMUserAggregator(LLMContextAggregator):
 
         self._user_speaking = True
 
-        logger.debug(f"User started speaking (user turn start strategy: {strategy})")
-
         # Reset all user turn start strategies to start fresh.
         if self.turn_start_strategies:
             for s in self.turn_start_strategies.user:
                 await s.reset()
 
-        await self.push_frame(UserStartedSpeakingFrame())
-        await self.push_frame(InterruptionFrame())
+        if self._params.enable_user_speaking_frames:
+            logger.debug(f"User started speaking (user turn start strategy: {strategy})")
+            await self.push_frame(UserStartedSpeakingFrame())
+            await self.push_frame(InterruptionFrame())
 
     async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy):
         if not self._user_speaking:
@@ -386,14 +401,16 @@ class LLMUserAggregator(LLMContextAggregator):
 
         self._user_speaking = False
 
-        logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})")
-
         # Reset all bot turn start strategies to start fresh.
         if self.turn_start_strategies:
             for s in self.turn_start_strategies.bot:
                 await s.reset()
 
-        await self.push_frame(UserStoppedSpeakingFrame())
+        if self._params.enable_user_speaking_frames:
+            logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})")
+            await self.push_frame(UserStoppedSpeakingFrame())
+
+        # Always push context frame.
         await self.push_aggregation()