Make max_context_tokens and max_unsummarized_messages independently optional
Allow either threshold to be set to None to cleanly disable that trigger, instead of requiring users to set a very large number as a workaround. At least one of the two must remain set (validated at construction time).
This commit is contained in:
@@ -211,14 +211,16 @@ class LLMContextSummarizer(BaseObject):
|
||||
|
||||
Evaluates whether the current context has reached either the token
|
||||
threshold or message count threshold that warrants compression.
|
||||
Either threshold can be ``None`` to disable that check; at least one
|
||||
must be set (enforced at config construction time).
|
||||
|
||||
Returns:
|
||||
True if all conditions are met:
|
||||
- ``auto_trigger`` is enabled
|
||||
- No summarization currently in progress
|
||||
- AND either:
|
||||
- Token count exceeds ``max_context_tokens``
|
||||
- OR message count exceeds ``max_unsummarized_messages`` since last summary
|
||||
- Token count exceeds ``max_context_tokens`` (when set)
|
||||
- OR message count exceeds ``max_unsummarized_messages`` since last summary (when set)
|
||||
"""
|
||||
logger.trace(f"{self}: Checking if context summarization is needed")
|
||||
|
||||
@@ -235,19 +237,20 @@ class LLMContextSummarizer(BaseObject):
|
||||
|
||||
# Check if we've reached the token limit
|
||||
token_limit = self._auto_config.max_context_tokens
|
||||
token_limit_exceeded = total_tokens >= token_limit
|
||||
token_limit_exceeded = token_limit is not None and total_tokens >= token_limit
|
||||
|
||||
# Check if we've exceeded max unsummarized messages
|
||||
messages_since_summary = len(self._context.messages) - 1
|
||||
message_threshold = self._auto_config.max_unsummarized_messages
|
||||
message_threshold_exceeded = (
|
||||
messages_since_summary >= self._auto_config.max_unsummarized_messages
|
||||
message_threshold is not None and messages_since_summary >= message_threshold
|
||||
)
|
||||
|
||||
logger.trace(
|
||||
f"{self}: Context has {num_messages} messages, "
|
||||
f"~{total_tokens} tokens (limit: {token_limit}), "
|
||||
f"~{total_tokens} tokens (limit: {token_limit if token_limit is not None else 'disabled'}), "
|
||||
f"{messages_since_summary} messages since last summary "
|
||||
f"(message threshold: {self._auto_config.max_unsummarized_messages})"
|
||||
f"(message threshold: {message_threshold if message_threshold is not None else 'disabled'})"
|
||||
)
|
||||
|
||||
# Trigger if either limit is exceeded
|
||||
@@ -261,9 +264,7 @@ class LLMContextSummarizer(BaseObject):
|
||||
if token_limit_exceeded:
|
||||
reason.append(f"~{total_tokens} tokens (>={token_limit} limit)")
|
||||
if message_threshold_exceeded:
|
||||
reason.append(
|
||||
f"{messages_since_summary} messages (>={self._auto_config.max_unsummarized_messages} threshold)"
|
||||
)
|
||||
reason.append(f"{messages_since_summary} messages (>={message_threshold} threshold)")
|
||||
|
||||
logger.debug(f"{self}: ✓ Summarization needed - {', '.join(reason)}")
|
||||
return True
|
||||
|
||||
@@ -119,33 +119,45 @@ class LLMAutoContextSummarizationConfig:
|
||||
that summary is generated. Summarization is triggered when either the
|
||||
token limit or the unsummarized message count threshold is exceeded.
|
||||
|
||||
At least one of ``max_context_tokens`` and ``max_unsummarized_messages``
|
||||
must be set. Set the other to ``None`` to disable that threshold.
|
||||
|
||||
Parameters:
|
||||
max_context_tokens: Maximum allowed context size in tokens. When this
|
||||
limit is reached, summarization is triggered to compress the context.
|
||||
The tokens are calculated using the industry-standard approximation
|
||||
of 1 token ≈ 4 characters.
|
||||
of 1 token ≈ 4 characters. Set to ``None`` to disable token-based
|
||||
triggering.
|
||||
max_unsummarized_messages: Maximum number of new messages that can
|
||||
accumulate since the last summary before triggering a new
|
||||
summarization. This ensures regular compression even if token
|
||||
limits are not reached.
|
||||
limits are not reached. Set to ``None`` to disable message-count
|
||||
triggering.
|
||||
summary_config: Configuration for summary generation parameters
|
||||
(prompt, token budget, messages to keep). If not provided, uses
|
||||
default ``LLMContextSummaryConfig`` values.
|
||||
"""
|
||||
|
||||
max_context_tokens: int = 8000
|
||||
max_unsummarized_messages: int = 20
|
||||
max_context_tokens: Optional[int] = 8000
|
||||
max_unsummarized_messages: Optional[int] = 20
|
||||
summary_config: LLMContextSummaryConfig = field(default_factory=LLMContextSummaryConfig)
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate configuration parameters."""
|
||||
if self.max_context_tokens <= 0:
|
||||
if self.max_context_tokens is None and self.max_unsummarized_messages is None:
|
||||
raise ValueError(
|
||||
"At least one of max_context_tokens and max_unsummarized_messages must be set"
|
||||
)
|
||||
if self.max_context_tokens is not None and self.max_context_tokens <= 0:
|
||||
raise ValueError("max_context_tokens must be positive")
|
||||
if self.max_unsummarized_messages < 1:
|
||||
if self.max_unsummarized_messages is not None and self.max_unsummarized_messages < 1:
|
||||
raise ValueError("max_unsummarized_messages must be at least 1")
|
||||
|
||||
# Auto-adjust target_context_tokens if it exceeds max_context_tokens
|
||||
if self.summary_config.target_context_tokens > self.max_context_tokens:
|
||||
if (
|
||||
self.max_context_tokens is not None
|
||||
and self.summary_config.target_context_tokens > self.max_context_tokens
|
||||
):
|
||||
# Use 80% of max_context_tokens as a reasonable default
|
||||
self.summary_config.target_context_tokens = int(self.max_context_tokens * 0.8)
|
||||
|
||||
@@ -154,7 +166,7 @@ class LLMAutoContextSummarizationConfig:
|
||||
class LLMContextSummarizationConfig:
|
||||
"""Configuration for context summarization behavior.
|
||||
|
||||
.. deprecated::
|
||||
.. deprecated:: 0.0.104
|
||||
Use :class:`LLMAutoContextSummarizationConfig` with a nested
|
||||
:class:`LLMContextSummaryConfig` instead::
|
||||
|
||||
@@ -169,15 +181,17 @@ class LLMContextSummarizationConfig:
|
||||
|
||||
Parameters:
|
||||
max_context_tokens: Maximum allowed context size in tokens.
|
||||
Set to ``None`` to disable token-based triggering.
|
||||
target_context_tokens: Maximum token size for the generated summary.
|
||||
max_unsummarized_messages: Maximum new messages before triggering summarization.
|
||||
Set to ``None`` to disable message-count triggering.
|
||||
min_messages_after_summary: Number of recent messages to preserve.
|
||||
summarization_prompt: Custom prompt for summary generation.
|
||||
"""
|
||||
|
||||
max_context_tokens: int = 8000
|
||||
max_context_tokens: Optional[int] = 8000
|
||||
target_context_tokens: int = 6000
|
||||
max_unsummarized_messages: int = 20
|
||||
max_unsummarized_messages: Optional[int] = 20
|
||||
min_messages_after_summary: int = 4
|
||||
summarization_prompt: Optional[str] = None
|
||||
summary_message_template: str = "Conversation summary: {summary}"
|
||||
@@ -192,17 +206,24 @@ class LLMContextSummarizationConfig:
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if self.max_context_tokens <= 0:
|
||||
if self.max_context_tokens is None and self.max_unsummarized_messages is None:
|
||||
raise ValueError(
|
||||
"At least one of max_context_tokens and max_unsummarized_messages must be set"
|
||||
)
|
||||
if self.max_context_tokens is not None and self.max_context_tokens <= 0:
|
||||
raise ValueError("max_context_tokens must be positive")
|
||||
if self.target_context_tokens <= 0:
|
||||
raise ValueError("target_context_tokens must be positive")
|
||||
|
||||
# Auto-adjust target_context_tokens if it exceeds max_context_tokens
|
||||
if self.target_context_tokens > self.max_context_tokens:
|
||||
if (
|
||||
self.max_context_tokens is not None
|
||||
and self.target_context_tokens > self.max_context_tokens
|
||||
):
|
||||
# Use 80% of max_context_tokens as a reasonable default
|
||||
self.target_context_tokens = int(self.max_context_tokens * 0.8)
|
||||
|
||||
if self.max_unsummarized_messages < 1:
|
||||
if self.max_unsummarized_messages is not None and self.max_unsummarized_messages < 1:
|
||||
raise ValueError("max_unsummarized_messages must be at least 1")
|
||||
if self.min_messages_after_summary < 0:
|
||||
raise ValueError("min_messages_after_summary must be positive")
|
||||
|
||||
@@ -239,6 +239,43 @@ class TestLLMAutoContextSummarizationConfig(unittest.TestCase):
|
||||
)
|
||||
self.assertLessEqual(config.summary_config.target_context_tokens, config.max_context_tokens)
|
||||
|
||||
def test_max_context_tokens_none(self):
|
||||
"""Test that max_context_tokens can be None when max_unsummarized_messages is set."""
|
||||
config = LLMAutoContextSummarizationConfig(
|
||||
max_context_tokens=None,
|
||||
max_unsummarized_messages=20,
|
||||
)
|
||||
self.assertIsNone(config.max_context_tokens)
|
||||
self.assertEqual(config.max_unsummarized_messages, 20)
|
||||
|
||||
def test_max_unsummarized_messages_none(self):
|
||||
"""Test that max_unsummarized_messages can be None when max_context_tokens is set."""
|
||||
config = LLMAutoContextSummarizationConfig(
|
||||
max_context_tokens=8000,
|
||||
max_unsummarized_messages=None,
|
||||
)
|
||||
self.assertEqual(config.max_context_tokens, 8000)
|
||||
self.assertIsNone(config.max_unsummarized_messages)
|
||||
|
||||
def test_both_none_raises(self):
|
||||
"""Test that setting both thresholds to None raises ValueError."""
|
||||
with self.assertRaises(ValueError) as cm:
|
||||
LLMAutoContextSummarizationConfig(
|
||||
max_context_tokens=None,
|
||||
max_unsummarized_messages=None,
|
||||
)
|
||||
self.assertIn("at least one", str(cm.exception).lower())
|
||||
|
||||
def test_target_tokens_not_auto_adjusted_when_max_none(self):
|
||||
"""Test that target_context_tokens is not auto-adjusted when max_context_tokens is None."""
|
||||
config = LLMAutoContextSummarizationConfig(
|
||||
max_context_tokens=None,
|
||||
max_unsummarized_messages=10,
|
||||
summary_config=LLMContextSummaryConfig(target_context_tokens=9000),
|
||||
)
|
||||
# target_context_tokens should remain unchanged since there's no max to compare against
|
||||
self.assertEqual(config.summary_config.target_context_tokens, 9000)
|
||||
|
||||
|
||||
class TestLLMContextSummarizationConfigDeprecated(unittest.TestCase):
|
||||
"""Tests for deprecated LLMContextSummarizationConfig."""
|
||||
|
||||
@@ -668,6 +668,98 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase):
|
||||
|
||||
await summarizer.cleanup()
|
||||
|
||||
async def test_token_limit_none_only_message_threshold(self):
|
||||
"""Test that only message threshold triggers when token limit is None."""
|
||||
config = LLMAutoContextSummarizationConfig(
|
||||
max_context_tokens=None,
|
||||
max_unsummarized_messages=5,
|
||||
)
|
||||
|
||||
summarizer = LLMContextSummarizer(context=self.context, config=config)
|
||||
await summarizer.setup(self.task_manager)
|
||||
|
||||
request_frame = None
|
||||
|
||||
@summarizer.event_handler("on_request_summarization")
|
||||
async def on_request_summarization(summarizer, frame):
|
||||
nonlocal request_frame
|
||||
request_frame = frame
|
||||
|
||||
# Add many tokens but fewer than 5 messages — should NOT trigger
|
||||
for i in range(3):
|
||||
self.context.add_message(
|
||||
{"role": "user", "content": "x" * 10000} # Lots of tokens
|
||||
)
|
||||
|
||||
await summarizer.process_frame(LLMFullResponseStartFrame())
|
||||
self.assertIsNone(request_frame)
|
||||
|
||||
# Cross the message threshold (5 messages since summary = 6 total including system)
|
||||
for i in range(3):
|
||||
self.context.add_message({"role": "user", "content": f"Message {i}"})
|
||||
|
||||
await summarizer.process_frame(LLMFullResponseStartFrame())
|
||||
self.assertIsNotNone(request_frame)
|
||||
|
||||
await summarizer.cleanup()
|
||||
|
||||
async def test_message_limit_none_only_token_threshold(self):
|
||||
"""Test that only token threshold triggers when message limit is None."""
|
||||
config = LLMAutoContextSummarizationConfig(
|
||||
max_context_tokens=100, # Very low
|
||||
max_unsummarized_messages=None,
|
||||
)
|
||||
|
||||
summarizer = LLMContextSummarizer(context=self.context, config=config)
|
||||
await summarizer.setup(self.task_manager)
|
||||
|
||||
request_frame = None
|
||||
|
||||
@summarizer.event_handler("on_request_summarization")
|
||||
async def on_request_summarization(summarizer, frame):
|
||||
nonlocal request_frame
|
||||
request_frame = frame
|
||||
|
||||
# Add many messages that exceed the token limit
|
||||
for i in range(10):
|
||||
self.context.add_message(
|
||||
{"role": "user", "content": "This is a test message with enough tokens."}
|
||||
)
|
||||
|
||||
await summarizer.process_frame(LLMFullResponseStartFrame())
|
||||
self.assertIsNotNone(request_frame)
|
||||
|
||||
await summarizer.cleanup()
|
||||
|
||||
async def test_message_limit_none_no_trigger_below_tokens(self):
|
||||
"""Test that many messages don't trigger when message limit is None and tokens are low."""
|
||||
config = LLMAutoContextSummarizationConfig(
|
||||
max_context_tokens=100000, # Very high
|
||||
max_unsummarized_messages=None,
|
||||
)
|
||||
|
||||
summarizer = LLMContextSummarizer(context=self.context, config=config)
|
||||
await summarizer.setup(self.task_manager)
|
||||
|
||||
request_frame = None
|
||||
|
||||
@summarizer.event_handler("on_request_summarization")
|
||||
async def on_request_summarization(summarizer, frame):
|
||||
nonlocal request_frame
|
||||
request_frame = frame
|
||||
|
||||
# Add many short messages — would exceed any reasonable message count
|
||||
# but tokens stay well below the limit
|
||||
for i in range(50):
|
||||
self.context.add_message({"role": "user", "content": f"Msg {i}"})
|
||||
|
||||
await summarizer.process_frame(LLMFullResponseStartFrame())
|
||||
|
||||
# Should NOT trigger because token limit is not exceeded
|
||||
self.assertIsNone(request_frame)
|
||||
|
||||
await summarizer.cleanup()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user