diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index 54879a8bb..4e55ffaf1 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -211,14 +211,16 @@ class LLMContextSummarizer(BaseObject): Evaluates whether the current context has reached either the token threshold or message count threshold that warrants compression. + Either threshold can be ``None`` to disable that check; at least one + must be set (enforced at config construction time). Returns: True if all conditions are met: - ``auto_trigger`` is enabled - No summarization currently in progress - AND either: - - Token count exceeds ``max_context_tokens`` - - OR message count exceeds ``max_unsummarized_messages`` since last summary + - Token count exceeds ``max_context_tokens`` (when set) + - OR message count exceeds ``max_unsummarized_messages`` since last summary (when set) """ logger.trace(f"{self}: Checking if context summarization is needed") @@ -235,19 +237,20 @@ class LLMContextSummarizer(BaseObject): # Check if we've reached the token limit token_limit = self._auto_config.max_context_tokens - token_limit_exceeded = total_tokens >= token_limit + token_limit_exceeded = token_limit is not None and total_tokens >= token_limit # Check if we've exceeded max unsummarized messages messages_since_summary = len(self._context.messages) - 1 + message_threshold = self._auto_config.max_unsummarized_messages message_threshold_exceeded = ( - messages_since_summary >= self._auto_config.max_unsummarized_messages + message_threshold is not None and messages_since_summary >= message_threshold ) logger.trace( f"{self}: Context has {num_messages} messages, " - f"~{total_tokens} tokens (limit: {token_limit}), " + f"~{total_tokens} tokens (limit: {token_limit if token_limit is not None else 'disabled'}), " f"{messages_since_summary} messages since last summary " - f"(message threshold: {self._auto_config.max_unsummarized_messages})" + f"(message threshold: {message_threshold if message_threshold is not None else 'disabled'})" ) # Trigger if either limit is exceeded @@ -261,9 +264,7 @@ class LLMContextSummarizer(BaseObject): if token_limit_exceeded: reason.append(f"~{total_tokens} tokens (>={token_limit} limit)") if message_threshold_exceeded: - reason.append( - f"{messages_since_summary} messages (>={self._auto_config.max_unsummarized_messages} threshold)" - ) + reason.append(f"{messages_since_summary} messages (>={message_threshold} threshold)") logger.debug(f"{self}: ✓ Summarization needed - {', '.join(reason)}") return True diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index e68311942..707d0c32d 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -119,33 +119,45 @@ class LLMAutoContextSummarizationConfig: that summary is generated. Summarization is triggered when either the token limit or the unsummarized message count threshold is exceeded. + At least one of ``max_context_tokens`` and ``max_unsummarized_messages`` + must be set. Set the other to ``None`` to disable that threshold. + Parameters: max_context_tokens: Maximum allowed context size in tokens. When this limit is reached, summarization is triggered to compress the context. The tokens are calculated using the industry-standard approximation - of 1 token ≈ 4 characters. + of 1 token ≈ 4 characters. Set to ``None`` to disable token-based + triggering. max_unsummarized_messages: Maximum number of new messages that can accumulate since the last summary before triggering a new summarization. This ensures regular compression even if token - limits are not reached. + limits are not reached. Set to ``None`` to disable message-count + triggering. summary_config: Configuration for summary generation parameters (prompt, token budget, messages to keep). If not provided, uses default ``LLMContextSummaryConfig`` values. """ - max_context_tokens: int = 8000 - max_unsummarized_messages: int = 20 + max_context_tokens: Optional[int] = 8000 + max_unsummarized_messages: Optional[int] = 20 summary_config: LLMContextSummaryConfig = field(default_factory=LLMContextSummaryConfig) def __post_init__(self): """Validate configuration parameters.""" - if self.max_context_tokens <= 0: + if self.max_context_tokens is None and self.max_unsummarized_messages is None: + raise ValueError( + "At least one of max_context_tokens and max_unsummarized_messages must be set" + ) + if self.max_context_tokens is not None and self.max_context_tokens <= 0: raise ValueError("max_context_tokens must be positive") - if self.max_unsummarized_messages < 1: + if self.max_unsummarized_messages is not None and self.max_unsummarized_messages < 1: raise ValueError("max_unsummarized_messages must be at least 1") # Auto-adjust target_context_tokens if it exceeds max_context_tokens - if self.summary_config.target_context_tokens > self.max_context_tokens: + if ( + self.max_context_tokens is not None + and self.summary_config.target_context_tokens > self.max_context_tokens + ): # Use 80% of max_context_tokens as a reasonable default self.summary_config.target_context_tokens = int(self.max_context_tokens * 0.8) @@ -154,7 +166,7 @@ class LLMAutoContextSummarizationConfig: class LLMContextSummarizationConfig: """Configuration for context summarization behavior. - .. deprecated:: + .. deprecated:: 0.0.104 Use :class:`LLMAutoContextSummarizationConfig` with a nested :class:`LLMContextSummaryConfig` instead:: @@ -169,15 +181,17 @@ class LLMContextSummarizationConfig: Parameters: max_context_tokens: Maximum allowed context size in tokens. + Set to ``None`` to disable token-based triggering. target_context_tokens: Maximum token size for the generated summary. max_unsummarized_messages: Maximum new messages before triggering summarization. + Set to ``None`` to disable message-count triggering. min_messages_after_summary: Number of recent messages to preserve. summarization_prompt: Custom prompt for summary generation. """ - max_context_tokens: int = 8000 + max_context_tokens: Optional[int] = 8000 target_context_tokens: int = 6000 - max_unsummarized_messages: int = 20 + max_unsummarized_messages: Optional[int] = 20 min_messages_after_summary: int = 4 summarization_prompt: Optional[str] = None summary_message_template: str = "Conversation summary: {summary}" @@ -192,17 +206,24 @@ class LLMContextSummarizationConfig: DeprecationWarning, stacklevel=2, ) - if self.max_context_tokens <= 0: + if self.max_context_tokens is None and self.max_unsummarized_messages is None: + raise ValueError( + "At least one of max_context_tokens and max_unsummarized_messages must be set" + ) + if self.max_context_tokens is not None and self.max_context_tokens <= 0: raise ValueError("max_context_tokens must be positive") if self.target_context_tokens <= 0: raise ValueError("target_context_tokens must be positive") # Auto-adjust target_context_tokens if it exceeds max_context_tokens - if self.target_context_tokens > self.max_context_tokens: + if ( + self.max_context_tokens is not None + and self.target_context_tokens > self.max_context_tokens + ): # Use 80% of max_context_tokens as a reasonable default self.target_context_tokens = int(self.max_context_tokens * 0.8) - if self.max_unsummarized_messages < 1: + if self.max_unsummarized_messages is not None and self.max_unsummarized_messages < 1: raise ValueError("max_unsummarized_messages must be at least 1") if self.min_messages_after_summary < 0: raise ValueError("min_messages_after_summary must be positive") diff --git a/tests/test_context_summarization.py b/tests/test_context_summarization.py index 10223a606..e2666e7fa 100644 --- a/tests/test_context_summarization.py +++ b/tests/test_context_summarization.py @@ -239,6 +239,43 @@ class TestLLMAutoContextSummarizationConfig(unittest.TestCase): ) self.assertLessEqual(config.summary_config.target_context_tokens, config.max_context_tokens) + def test_max_context_tokens_none(self): + """Test that max_context_tokens can be None when max_unsummarized_messages is set.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=None, + max_unsummarized_messages=20, + ) + self.assertIsNone(config.max_context_tokens) + self.assertEqual(config.max_unsummarized_messages, 20) + + def test_max_unsummarized_messages_none(self): + """Test that max_unsummarized_messages can be None when max_context_tokens is set.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=8000, + max_unsummarized_messages=None, + ) + self.assertEqual(config.max_context_tokens, 8000) + self.assertIsNone(config.max_unsummarized_messages) + + def test_both_none_raises(self): + """Test that setting both thresholds to None raises ValueError.""" + with self.assertRaises(ValueError) as cm: + LLMAutoContextSummarizationConfig( + max_context_tokens=None, + max_unsummarized_messages=None, + ) + self.assertIn("at least one", str(cm.exception).lower()) + + def test_target_tokens_not_auto_adjusted_when_max_none(self): + """Test that target_context_tokens is not auto-adjusted when max_context_tokens is None.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=None, + max_unsummarized_messages=10, + summary_config=LLMContextSummaryConfig(target_context_tokens=9000), + ) + # target_context_tokens should remain unchanged since there's no max to compare against + self.assertEqual(config.summary_config.target_context_tokens, 9000) + class TestLLMContextSummarizationConfigDeprecated(unittest.TestCase): """Tests for deprecated LLMContextSummarizationConfig.""" diff --git a/tests/test_llm_context_summarizer.py b/tests/test_llm_context_summarizer.py index 7e8b326f9..bbe8648ef 100644 --- a/tests/test_llm_context_summarizer.py +++ b/tests/test_llm_context_summarizer.py @@ -668,6 +668,98 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): await summarizer.cleanup() + async def test_token_limit_none_only_message_threshold(self): + """Test that only message threshold triggers when token limit is None.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=None, + max_unsummarized_messages=5, + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + # Add many tokens but fewer than 5 messages — should NOT trigger + for i in range(3): + self.context.add_message( + {"role": "user", "content": "x" * 10000} # Lots of tokens + ) + + await summarizer.process_frame(LLMFullResponseStartFrame()) + self.assertIsNone(request_frame) + + # Cross the message threshold (5 messages since summary = 6 total including system) + for i in range(3): + self.context.add_message({"role": "user", "content": f"Message {i}"}) + + await summarizer.process_frame(LLMFullResponseStartFrame()) + self.assertIsNotNone(request_frame) + + await summarizer.cleanup() + + async def test_message_limit_none_only_token_threshold(self): + """Test that only token threshold triggers when message limit is None.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=100, # Very low + max_unsummarized_messages=None, + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + # Add many messages that exceed the token limit + for i in range(10): + self.context.add_message( + {"role": "user", "content": "This is a test message with enough tokens."} + ) + + await summarizer.process_frame(LLMFullResponseStartFrame()) + self.assertIsNotNone(request_frame) + + await summarizer.cleanup() + + async def test_message_limit_none_no_trigger_below_tokens(self): + """Test that many messages don't trigger when message limit is None and tokens are low.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=100000, # Very high + max_unsummarized_messages=None, + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + # Add many short messages — would exceed any reasonable message count + # but tokens stay well below the limit + for i in range(50): + self.context.add_message({"role": "user", "content": f"Msg {i}"}) + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + # Should NOT trigger because token limit is not exceeded + self.assertIsNone(request_frame) + + await summarizer.cleanup() + if __name__ == "__main__": unittest.main()