Fix context summarization failing with mid-conversation system messages

Only treat messages[0] as the initial system prompt when determining the
summarization range. Previously, the code scanned the entire context for
the first system-role message, which caused failures when the only system
message was a mid-conversation injection (e.g. "The user has been quiet").
In that case summary_start exceeded summary_end, producing an empty range
and "No messages to summarize" errors.

Fixes #4286
This commit is contained in:
Mark Backman
2026-04-14 11:48:50 -04:00
parent ab74605a26
commit 989fb4deaa
3 changed files with 74 additions and 29 deletions

View File

@@ -429,18 +429,17 @@ class LLMContextSummarizer(BaseObject):
config = self._auto_config.summary_config
messages = self._context.messages
# Find the first system message to preserve. LLMSpecificMessage instances are excluded
# because they are not dict-like and never represent a system message; they hold
# service-specific metadata (e.g. thinking blocks) that is always paired with a
# standard message.
first_system_msg = next(
(
msg
for msg in messages
if not isinstance(msg, LLMSpecificMessage) and msg.get("role") == "system"
),
None,
)
# Preserve the first message if it is a system message (initial system prompt).
# Only messages[0] is treated as the system preamble — system messages at
# other positions are mid-conversation injections and are not preserved
# separately (they will be part of the summary or the recent messages).
first_system_msg = None
if (
messages
and not isinstance(messages[0], LLMSpecificMessage)
and messages[0].get("role") == "system"
):
first_system_msg = messages[0]
# Get recent messages to keep
recent_messages = messages[last_summarized_index + 1 :]

View File

@@ -522,25 +522,19 @@ class LLMContextSummarizationUtil:
if len(messages) <= min_messages_to_keep:
return LLMMessagesToSummarize(messages=[], last_summarized_index=-1)
# Find first system message index. LLMSpecificMessage instances are excluded because
# they are not dict-like and never represent a system message; they hold
# service-specific metadata (e.g. thinking blocks) that is always paired with a
# standard message.
first_system_index = next(
(
i
for i, msg in enumerate(messages)
if not isinstance(msg, LLMSpecificMessage) and msg.get("role") == "system"
),
-1,
# Check if the first message is a system message (initial system prompt).
# Only messages[0] is treated as the system message to preserve — system
# messages at other positions are mid-conversation injections and should be
# included in the summarization range.
first_msg = messages[0] if messages else None
first_is_system = (
first_msg is not None
and not isinstance(first_msg, LLMSpecificMessage)
and first_msg.get("role") == "system"
)
# Messages to summarize are between first system and recent messages
# We exclude the first system message itself
if first_system_index >= 0:
summary_start = first_system_index + 1
else:
summary_start = 0
# Start summarization after the initial system message if present
summary_start = 1 if first_is_system else 0
# Get messages to keep (last N messages)
summary_end = len(messages) - min_messages_to_keep

View File

@@ -136,6 +136,58 @@ class TestContextSummarizationMixin(unittest.TestCase):
self.assertEqual(len(result.messages), 0)
self.assertEqual(result.last_summarized_index, -1)
def test_get_messages_to_summarize_mid_conversation_system_message(self):
"""Test that a system message mid-conversation is included in summarization.
Regression test for #4286: when the only system message is a mid-conversation
injection (not at index 0), get_messages_to_summarize should start from index 0,
not after the mid-conversation system message.
"""
context = LLMContext()
# No system message at index 0 (using system_instruction instead)
context.add_message({"role": "user", "content": "Hello"})
context.add_message({"role": "assistant", "content": "Hi there"})
context.add_message({"role": "user", "content": "Tell me a joke"})
context.add_message({"role": "assistant", "content": "Why did the chicken..."})
# Mid-conversation system injection (e.g. "The user has been quiet")
context.add_message({"role": "system", "content": "The user has been quiet for a while"})
context.add_message({"role": "user", "content": "Latest message"})
context.add_message({"role": "assistant", "content": "Latest response"})
# Keep last 2 messages
result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2)
# Should summarize from index 0 (no system preamble to skip)
# through index 4, keeping last 2 messages
self.assertEqual(len(result.messages), 5)
self.assertEqual(result.messages[0]["content"], "Hello")
# The mid-conversation system message should be included in summarization
self.assertEqual(result.messages[4]["content"], "The user has been quiet for a while")
self.assertEqual(result.last_summarized_index, 4)
def test_get_messages_to_summarize_system_at_index_0_with_mid_system(self):
"""Test that only messages[0] system message is preserved, not later ones."""
context = LLMContext()
context.add_message({"role": "system", "content": "You are helpful"})
context.add_message({"role": "user", "content": "Hello"})
context.add_message({"role": "assistant", "content": "Hi"})
# Mid-conversation system injection
context.add_message({"role": "system", "content": "The user seems frustrated"})
context.add_message({"role": "user", "content": "Help me"})
context.add_message({"role": "assistant", "content": "Sure"})
# Keep last 2 messages
result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2)
# Should skip index 0 (system preamble), summarize indices 1-3
self.assertEqual(len(result.messages), 3)
self.assertEqual(result.messages[0]["content"], "Hello")
# Mid-conversation system message is summarized, not treated as preamble
self.assertEqual(result.messages[2]["content"], "The user seems frustrated")
self.assertEqual(result.last_summarized_index, 3)
def test_format_messages_for_summary(self):
"""Test message formatting for summary."""