Fix context summarization failing with mid-conversation system messages
Only treat messages[0] as the initial system prompt when determining the summarization range. Previously, the code scanned the entire context for the first system-role message, which caused failures when the only system message was a mid-conversation injection (e.g. "The user has been quiet"). In that case summary_start exceeded summary_end, producing an empty range and "No messages to summarize" errors. Fixes #4286
This commit is contained in:
@@ -429,18 +429,17 @@ class LLMContextSummarizer(BaseObject):
|
||||
config = self._auto_config.summary_config
|
||||
messages = self._context.messages
|
||||
|
||||
# Find the first system message to preserve. LLMSpecificMessage instances are excluded
|
||||
# because they are not dict-like and never represent a system message; they hold
|
||||
# service-specific metadata (e.g. thinking blocks) that is always paired with a
|
||||
# standard message.
|
||||
first_system_msg = next(
|
||||
(
|
||||
msg
|
||||
for msg in messages
|
||||
if not isinstance(msg, LLMSpecificMessage) and msg.get("role") == "system"
|
||||
),
|
||||
None,
|
||||
)
|
||||
# Preserve the first message if it is a system message (initial system prompt).
|
||||
# Only messages[0] is treated as the system preamble — system messages at
|
||||
# other positions are mid-conversation injections and are not preserved
|
||||
# separately (they will be part of the summary or the recent messages).
|
||||
first_system_msg = None
|
||||
if (
|
||||
messages
|
||||
and not isinstance(messages[0], LLMSpecificMessage)
|
||||
and messages[0].get("role") == "system"
|
||||
):
|
||||
first_system_msg = messages[0]
|
||||
|
||||
# Get recent messages to keep
|
||||
recent_messages = messages[last_summarized_index + 1 :]
|
||||
|
||||
@@ -522,25 +522,19 @@ class LLMContextSummarizationUtil:
|
||||
if len(messages) <= min_messages_to_keep:
|
||||
return LLMMessagesToSummarize(messages=[], last_summarized_index=-1)
|
||||
|
||||
# Find first system message index. LLMSpecificMessage instances are excluded because
|
||||
# they are not dict-like and never represent a system message; they hold
|
||||
# service-specific metadata (e.g. thinking blocks) that is always paired with a
|
||||
# standard message.
|
||||
first_system_index = next(
|
||||
(
|
||||
i
|
||||
for i, msg in enumerate(messages)
|
||||
if not isinstance(msg, LLMSpecificMessage) and msg.get("role") == "system"
|
||||
),
|
||||
-1,
|
||||
# Check if the first message is a system message (initial system prompt).
|
||||
# Only messages[0] is treated as the system message to preserve — system
|
||||
# messages at other positions are mid-conversation injections and should be
|
||||
# included in the summarization range.
|
||||
first_msg = messages[0] if messages else None
|
||||
first_is_system = (
|
||||
first_msg is not None
|
||||
and not isinstance(first_msg, LLMSpecificMessage)
|
||||
and first_msg.get("role") == "system"
|
||||
)
|
||||
|
||||
# Messages to summarize are between first system and recent messages
|
||||
# We exclude the first system message itself
|
||||
if first_system_index >= 0:
|
||||
summary_start = first_system_index + 1
|
||||
else:
|
||||
summary_start = 0
|
||||
# Start summarization after the initial system message if present
|
||||
summary_start = 1 if first_is_system else 0
|
||||
|
||||
# Get messages to keep (last N messages)
|
||||
summary_end = len(messages) - min_messages_to_keep
|
||||
|
||||
@@ -136,6 +136,58 @@ class TestContextSummarizationMixin(unittest.TestCase):
|
||||
self.assertEqual(len(result.messages), 0)
|
||||
self.assertEqual(result.last_summarized_index, -1)
|
||||
|
||||
def test_get_messages_to_summarize_mid_conversation_system_message(self):
|
||||
"""Test that a system message mid-conversation is included in summarization.
|
||||
|
||||
Regression test for #4286: when the only system message is a mid-conversation
|
||||
injection (not at index 0), get_messages_to_summarize should start from index 0,
|
||||
not after the mid-conversation system message.
|
||||
"""
|
||||
context = LLMContext()
|
||||
|
||||
# No system message at index 0 (using system_instruction instead)
|
||||
context.add_message({"role": "user", "content": "Hello"})
|
||||
context.add_message({"role": "assistant", "content": "Hi there"})
|
||||
context.add_message({"role": "user", "content": "Tell me a joke"})
|
||||
context.add_message({"role": "assistant", "content": "Why did the chicken..."})
|
||||
# Mid-conversation system injection (e.g. "The user has been quiet")
|
||||
context.add_message({"role": "system", "content": "The user has been quiet for a while"})
|
||||
context.add_message({"role": "user", "content": "Latest message"})
|
||||
context.add_message({"role": "assistant", "content": "Latest response"})
|
||||
|
||||
# Keep last 2 messages
|
||||
result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2)
|
||||
|
||||
# Should summarize from index 0 (no system preamble to skip)
|
||||
# through index 4, keeping last 2 messages
|
||||
self.assertEqual(len(result.messages), 5)
|
||||
self.assertEqual(result.messages[0]["content"], "Hello")
|
||||
# The mid-conversation system message should be included in summarization
|
||||
self.assertEqual(result.messages[4]["content"], "The user has been quiet for a while")
|
||||
self.assertEqual(result.last_summarized_index, 4)
|
||||
|
||||
def test_get_messages_to_summarize_system_at_index_0_with_mid_system(self):
|
||||
"""Test that only messages[0] system message is preserved, not later ones."""
|
||||
context = LLMContext()
|
||||
|
||||
context.add_message({"role": "system", "content": "You are helpful"})
|
||||
context.add_message({"role": "user", "content": "Hello"})
|
||||
context.add_message({"role": "assistant", "content": "Hi"})
|
||||
# Mid-conversation system injection
|
||||
context.add_message({"role": "system", "content": "The user seems frustrated"})
|
||||
context.add_message({"role": "user", "content": "Help me"})
|
||||
context.add_message({"role": "assistant", "content": "Sure"})
|
||||
|
||||
# Keep last 2 messages
|
||||
result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2)
|
||||
|
||||
# Should skip index 0 (system preamble), summarize indices 1-3
|
||||
self.assertEqual(len(result.messages), 3)
|
||||
self.assertEqual(result.messages[0]["content"], "Hello")
|
||||
# Mid-conversation system message is summarized, not treated as preamble
|
||||
self.assertEqual(result.messages[2]["content"], "The user seems frustrated")
|
||||
self.assertEqual(result.last_summarized_index, 3)
|
||||
|
||||
def test_format_messages_for_summary(self):
|
||||
"""Test message formatting for summary."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user