Make max_context_tokens and max_unsummarized_messages independently optional

Allow either threshold to be set to None to cleanly disable that trigger,
instead of requiring users to set a very large number as a workaround.
At least one of the two must remain set (validated at construction time).
This commit is contained in:
Mark Backman
2026-03-03 20:08:22 -05:00
parent 9186f65952
commit b358657a79
4 changed files with 173 additions and 22 deletions

View File

@@ -211,14 +211,16 @@ class LLMContextSummarizer(BaseObject):
Evaluates whether the current context has reached either the token
threshold or message count threshold that warrants compression.
Either threshold can be ``None`` to disable that check; at least one
must be set (enforced at config construction time).
Returns:
True if all conditions are met:
- ``auto_trigger`` is enabled
- No summarization currently in progress
- AND either:
- Token count exceeds ``max_context_tokens``
- OR message count exceeds ``max_unsummarized_messages`` since last summary
- Token count exceeds ``max_context_tokens`` (when set)
- OR message count exceeds ``max_unsummarized_messages`` since last summary (when set)
"""
logger.trace(f"{self}: Checking if context summarization is needed")
@@ -235,19 +237,20 @@ class LLMContextSummarizer(BaseObject):
# Check if we've reached the token limit
token_limit = self._auto_config.max_context_tokens
token_limit_exceeded = total_tokens >= token_limit
token_limit_exceeded = token_limit is not None and total_tokens >= token_limit
# Check if we've exceeded max unsummarized messages
messages_since_summary = len(self._context.messages) - 1
message_threshold = self._auto_config.max_unsummarized_messages
message_threshold_exceeded = (
messages_since_summary >= self._auto_config.max_unsummarized_messages
message_threshold is not None and messages_since_summary >= message_threshold
)
logger.trace(
f"{self}: Context has {num_messages} messages, "
f"~{total_tokens} tokens (limit: {token_limit}), "
f"~{total_tokens} tokens (limit: {token_limit if token_limit is not None else 'disabled'}), "
f"{messages_since_summary} messages since last summary "
f"(message threshold: {self._auto_config.max_unsummarized_messages})"
f"(message threshold: {message_threshold if message_threshold is not None else 'disabled'})"
)
# Trigger if either limit is exceeded
@@ -261,9 +264,7 @@ class LLMContextSummarizer(BaseObject):
if token_limit_exceeded:
reason.append(f"~{total_tokens} tokens (>={token_limit} limit)")
if message_threshold_exceeded:
reason.append(
f"{messages_since_summary} messages (>={self._auto_config.max_unsummarized_messages} threshold)"
)
reason.append(f"{messages_since_summary} messages (>={message_threshold} threshold)")
logger.debug(f"{self}: ✓ Summarization needed - {', '.join(reason)}")
return True

View File

@@ -119,33 +119,45 @@ class LLMAutoContextSummarizationConfig:
that summary is generated. Summarization is triggered when either the
token limit or the unsummarized message count threshold is exceeded.
At least one of ``max_context_tokens`` and ``max_unsummarized_messages``
must be set. Set the other to ``None`` to disable that threshold.
Parameters:
max_context_tokens: Maximum allowed context size in tokens. When this
limit is reached, summarization is triggered to compress the context.
The tokens are calculated using the industry-standard approximation
of 1 token ≈ 4 characters.
of 1 token ≈ 4 characters. Set to ``None`` to disable token-based
triggering.
max_unsummarized_messages: Maximum number of new messages that can
accumulate since the last summary before triggering a new
summarization. This ensures regular compression even if token
limits are not reached.
limits are not reached. Set to ``None`` to disable message-count
triggering.
summary_config: Configuration for summary generation parameters
(prompt, token budget, messages to keep). If not provided, uses
default ``LLMContextSummaryConfig`` values.
"""
max_context_tokens: int = 8000
max_unsummarized_messages: int = 20
max_context_tokens: Optional[int] = 8000
max_unsummarized_messages: Optional[int] = 20
summary_config: LLMContextSummaryConfig = field(default_factory=LLMContextSummaryConfig)
def __post_init__(self):
"""Validate configuration parameters."""
if self.max_context_tokens <= 0:
if self.max_context_tokens is None and self.max_unsummarized_messages is None:
raise ValueError(
"At least one of max_context_tokens and max_unsummarized_messages must be set"
)
if self.max_context_tokens is not None and self.max_context_tokens <= 0:
raise ValueError("max_context_tokens must be positive")
if self.max_unsummarized_messages < 1:
if self.max_unsummarized_messages is not None and self.max_unsummarized_messages < 1:
raise ValueError("max_unsummarized_messages must be at least 1")
# Auto-adjust target_context_tokens if it exceeds max_context_tokens
if self.summary_config.target_context_tokens > self.max_context_tokens:
if (
self.max_context_tokens is not None
and self.summary_config.target_context_tokens > self.max_context_tokens
):
# Use 80% of max_context_tokens as a reasonable default
self.summary_config.target_context_tokens = int(self.max_context_tokens * 0.8)
@@ -154,7 +166,7 @@ class LLMAutoContextSummarizationConfig:
class LLMContextSummarizationConfig:
"""Configuration for context summarization behavior.
.. deprecated::
.. deprecated:: 0.0.104
Use :class:`LLMAutoContextSummarizationConfig` with a nested
:class:`LLMContextSummaryConfig` instead::
@@ -169,15 +181,17 @@ class LLMContextSummarizationConfig:
Parameters:
max_context_tokens: Maximum allowed context size in tokens.
Set to ``None`` to disable token-based triggering.
target_context_tokens: Maximum token size for the generated summary.
max_unsummarized_messages: Maximum new messages before triggering summarization.
Set to ``None`` to disable message-count triggering.
min_messages_after_summary: Number of recent messages to preserve.
summarization_prompt: Custom prompt for summary generation.
"""
max_context_tokens: int = 8000
max_context_tokens: Optional[int] = 8000
target_context_tokens: int = 6000
max_unsummarized_messages: int = 20
max_unsummarized_messages: Optional[int] = 20
min_messages_after_summary: int = 4
summarization_prompt: Optional[str] = None
summary_message_template: str = "Conversation summary: {summary}"
@@ -192,17 +206,24 @@ class LLMContextSummarizationConfig:
DeprecationWarning,
stacklevel=2,
)
if self.max_context_tokens <= 0:
if self.max_context_tokens is None and self.max_unsummarized_messages is None:
raise ValueError(
"At least one of max_context_tokens and max_unsummarized_messages must be set"
)
if self.max_context_tokens is not None and self.max_context_tokens <= 0:
raise ValueError("max_context_tokens must be positive")
if self.target_context_tokens <= 0:
raise ValueError("target_context_tokens must be positive")
# Auto-adjust target_context_tokens if it exceeds max_context_tokens
if self.target_context_tokens > self.max_context_tokens:
if (
self.max_context_tokens is not None
and self.target_context_tokens > self.max_context_tokens
):
# Use 80% of max_context_tokens as a reasonable default
self.target_context_tokens = int(self.max_context_tokens * 0.8)
if self.max_unsummarized_messages < 1:
if self.max_unsummarized_messages is not None and self.max_unsummarized_messages < 1:
raise ValueError("max_unsummarized_messages must be at least 1")
if self.min_messages_after_summary < 0:
raise ValueError("min_messages_after_summary must be positive")

View File

@@ -239,6 +239,43 @@ class TestLLMAutoContextSummarizationConfig(unittest.TestCase):
)
self.assertLessEqual(config.summary_config.target_context_tokens, config.max_context_tokens)
def test_max_context_tokens_none(self):
"""Test that max_context_tokens can be None when max_unsummarized_messages is set."""
config = LLMAutoContextSummarizationConfig(
max_context_tokens=None,
max_unsummarized_messages=20,
)
self.assertIsNone(config.max_context_tokens)
self.assertEqual(config.max_unsummarized_messages, 20)
def test_max_unsummarized_messages_none(self):
"""Test that max_unsummarized_messages can be None when max_context_tokens is set."""
config = LLMAutoContextSummarizationConfig(
max_context_tokens=8000,
max_unsummarized_messages=None,
)
self.assertEqual(config.max_context_tokens, 8000)
self.assertIsNone(config.max_unsummarized_messages)
def test_both_none_raises(self):
"""Test that setting both thresholds to None raises ValueError."""
with self.assertRaises(ValueError) as cm:
LLMAutoContextSummarizationConfig(
max_context_tokens=None,
max_unsummarized_messages=None,
)
self.assertIn("at least one", str(cm.exception).lower())
def test_target_tokens_not_auto_adjusted_when_max_none(self):
"""Test that target_context_tokens is not auto-adjusted when max_context_tokens is None."""
config = LLMAutoContextSummarizationConfig(
max_context_tokens=None,
max_unsummarized_messages=10,
summary_config=LLMContextSummaryConfig(target_context_tokens=9000),
)
# target_context_tokens should remain unchanged since there's no max to compare against
self.assertEqual(config.summary_config.target_context_tokens, 9000)
class TestLLMContextSummarizationConfigDeprecated(unittest.TestCase):
"""Tests for deprecated LLMContextSummarizationConfig."""

View File

@@ -668,6 +668,98 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase):
await summarizer.cleanup()
async def test_token_limit_none_only_message_threshold(self):
"""Test that only message threshold triggers when token limit is None."""
config = LLMAutoContextSummarizationConfig(
max_context_tokens=None,
max_unsummarized_messages=5,
)
summarizer = LLMContextSummarizer(context=self.context, config=config)
await summarizer.setup(self.task_manager)
request_frame = None
@summarizer.event_handler("on_request_summarization")
async def on_request_summarization(summarizer, frame):
nonlocal request_frame
request_frame = frame
# Add many tokens but fewer than 5 messages — should NOT trigger
for i in range(3):
self.context.add_message(
{"role": "user", "content": "x" * 10000} # Lots of tokens
)
await summarizer.process_frame(LLMFullResponseStartFrame())
self.assertIsNone(request_frame)
# Cross the message threshold (5 messages since summary = 6 total including system)
for i in range(3):
self.context.add_message({"role": "user", "content": f"Message {i}"})
await summarizer.process_frame(LLMFullResponseStartFrame())
self.assertIsNotNone(request_frame)
await summarizer.cleanup()
async def test_message_limit_none_only_token_threshold(self):
"""Test that only token threshold triggers when message limit is None."""
config = LLMAutoContextSummarizationConfig(
max_context_tokens=100, # Very low
max_unsummarized_messages=None,
)
summarizer = LLMContextSummarizer(context=self.context, config=config)
await summarizer.setup(self.task_manager)
request_frame = None
@summarizer.event_handler("on_request_summarization")
async def on_request_summarization(summarizer, frame):
nonlocal request_frame
request_frame = frame
# Add many messages that exceed the token limit
for i in range(10):
self.context.add_message(
{"role": "user", "content": "This is a test message with enough tokens."}
)
await summarizer.process_frame(LLMFullResponseStartFrame())
self.assertIsNotNone(request_frame)
await summarizer.cleanup()
async def test_message_limit_none_no_trigger_below_tokens(self):
"""Test that many messages don't trigger when message limit is None and tokens are low."""
config = LLMAutoContextSummarizationConfig(
max_context_tokens=100000, # Very high
max_unsummarized_messages=None,
)
summarizer = LLMContextSummarizer(context=self.context, config=config)
await summarizer.setup(self.task_manager)
request_frame = None
@summarizer.event_handler("on_request_summarization")
async def on_request_summarization(summarizer, frame):
nonlocal request_frame
request_frame = frame
# Add many short messages — would exceed any reasonable message count
# but tokens stay well below the limit
for i in range(50):
self.context.add_message({"role": "user", "content": f"Msg {i}"})
await summarizer.process_frame(LLMFullResponseStartFrame())
# Should NOT trigger because token limit is not exceeded
self.assertIsNone(request_frame)
await summarizer.cleanup()
if __name__ == "__main__":
unittest.main()