LLMUserAggregator: allow external user started/stopped speaking frames

This commit is contained in:
Aleix Conchillo Flaqué
2025-12-11 10:29:56 -08:00
parent 962eb73cc4
commit d33c72a8b0

View File

@@ -15,6 +15,7 @@ import asyncio
import json
import warnings
from abc import abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, List, Literal, Optional, Set
from loguru import logger
@@ -58,7 +59,6 @@ from pipecat.processors.aggregators.llm_context import (
)
from pipecat.processors.aggregators.llm_response import (
LLMAssistantAggregatorParams,
LLMUserAggregatorParams,
)
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy
@@ -67,6 +67,21 @@ from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregate
from pipecat.utils.time import time_now_iso8601
@dataclass
class LLMUserAggregatorParams:
"""Parameters for configuring LLM user aggregation behavior.
Parameters:
enable_user_speaking_frames: If True, the aggregator will emit frames
indicating when the user starts and stops speaking, as well as
interruption frames. This is enabled by default, but you may want
to disable it if another component (e.g., an STT service) is already
generating these frames.
"""
enable_user_speaking_frames: bool = True
class LLMContextAggregator(FrameProcessor):
"""Base LLM aggregator that uses an LLMContext for conversation storage.
@@ -370,15 +385,15 @@ class LLMUserAggregator(LLMContextAggregator):
self._user_speaking = True
logger.debug(f"User started speaking (user turn start strategy: {strategy})")
# Reset all user turn start strategies to start fresh.
if self.turn_start_strategies:
for s in self.turn_start_strategies.user:
await s.reset()
await self.push_frame(UserStartedSpeakingFrame())
await self.push_frame(InterruptionFrame())
if self._params.enable_user_speaking_frames:
logger.debug(f"User started speaking (user turn start strategy: {strategy})")
await self.push_frame(UserStartedSpeakingFrame())
await self.push_frame(InterruptionFrame())
async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy):
if not self._user_speaking:
@@ -386,14 +401,16 @@ class LLMUserAggregator(LLMContextAggregator):
self._user_speaking = False
logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})")
# Reset all bot turn start strategies to start fresh.
if self.turn_start_strategies:
for s in self.turn_start_strategies.bot:
await s.reset()
await self.push_frame(UserStoppedSpeakingFrame())
if self._params.enable_user_speaking_frames:
logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})")
await self.push_frame(UserStoppedSpeakingFrame())
# Always push context frame.
await self.push_aggregation()