From 989fb4deaa085144ea4e74938a0b1415c3c46f68 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 14 Apr 2026 11:48:50 -0400 Subject: [PATCH] Fix context summarization failing with mid-conversation system messages Only treat messages[0] as the initial system prompt when determining the summarization range. Previously, the code scanned the entire context for the first system-role message, which caused failures when the only system message was a mid-conversation injection (e.g. "The user has been quiet"). In that case summary_start exceeded summary_end, producing an empty range and "No messages to summarize" errors. Fixes #4286 --- .../aggregators/llm_context_summarizer.py | 23 ++++---- .../context/llm_context_summarization.py | 28 ++++------ tests/test_context_summarization.py | 52 +++++++++++++++++++ 3 files changed, 74 insertions(+), 29 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index 4e55ffaf1..40be383fa 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -429,18 +429,17 @@ class LLMContextSummarizer(BaseObject): config = self._auto_config.summary_config messages = self._context.messages - # Find the first system message to preserve. LLMSpecificMessage instances are excluded - # because they are not dict-like and never represent a system message; they hold - # service-specific metadata (e.g. thinking blocks) that is always paired with a - # standard message. - first_system_msg = next( - ( - msg - for msg in messages - if not isinstance(msg, LLMSpecificMessage) and msg.get("role") == "system" - ), - None, - ) + # Preserve the first message if it is a system message (initial system prompt). + # Only messages[0] is treated as the system preamble — system messages at + # other positions are mid-conversation injections and are not preserved + # separately (they will be part of the summary or the recent messages). + first_system_msg = None + if ( + messages + and not isinstance(messages[0], LLMSpecificMessage) + and messages[0].get("role") == "system" + ): + first_system_msg = messages[0] # Get recent messages to keep recent_messages = messages[last_summarized_index + 1 :] diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index 3a5128bc5..cfdf8f82f 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -522,25 +522,19 @@ class LLMContextSummarizationUtil: if len(messages) <= min_messages_to_keep: return LLMMessagesToSummarize(messages=[], last_summarized_index=-1) - # Find first system message index. LLMSpecificMessage instances are excluded because - # they are not dict-like and never represent a system message; they hold - # service-specific metadata (e.g. thinking blocks) that is always paired with a - # standard message. - first_system_index = next( - ( - i - for i, msg in enumerate(messages) - if not isinstance(msg, LLMSpecificMessage) and msg.get("role") == "system" - ), - -1, + # Check if the first message is a system message (initial system prompt). + # Only messages[0] is treated as the system message to preserve — system + # messages at other positions are mid-conversation injections and should be + # included in the summarization range. + first_msg = messages[0] if messages else None + first_is_system = ( + first_msg is not None + and not isinstance(first_msg, LLMSpecificMessage) + and first_msg.get("role") == "system" ) - # Messages to summarize are between first system and recent messages - # We exclude the first system message itself - if first_system_index >= 0: - summary_start = first_system_index + 1 - else: - summary_start = 0 + # Start summarization after the initial system message if present + summary_start = 1 if first_is_system else 0 # Get messages to keep (last N messages) summary_end = len(messages) - min_messages_to_keep diff --git a/tests/test_context_summarization.py b/tests/test_context_summarization.py index 26d26614e..c37eb29bc 100644 --- a/tests/test_context_summarization.py +++ b/tests/test_context_summarization.py @@ -136,6 +136,58 @@ class TestContextSummarizationMixin(unittest.TestCase): self.assertEqual(len(result.messages), 0) self.assertEqual(result.last_summarized_index, -1) + def test_get_messages_to_summarize_mid_conversation_system_message(self): + """Test that a system message mid-conversation is included in summarization. + + Regression test for #4286: when the only system message is a mid-conversation + injection (not at index 0), get_messages_to_summarize should start from index 0, + not after the mid-conversation system message. + """ + context = LLMContext() + + # No system message at index 0 (using system_instruction instead) + context.add_message({"role": "user", "content": "Hello"}) + context.add_message({"role": "assistant", "content": "Hi there"}) + context.add_message({"role": "user", "content": "Tell me a joke"}) + context.add_message({"role": "assistant", "content": "Why did the chicken..."}) + # Mid-conversation system injection (e.g. "The user has been quiet") + context.add_message({"role": "system", "content": "The user has been quiet for a while"}) + context.add_message({"role": "user", "content": "Latest message"}) + context.add_message({"role": "assistant", "content": "Latest response"}) + + # Keep last 2 messages + result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2) + + # Should summarize from index 0 (no system preamble to skip) + # through index 4, keeping last 2 messages + self.assertEqual(len(result.messages), 5) + self.assertEqual(result.messages[0]["content"], "Hello") + # The mid-conversation system message should be included in summarization + self.assertEqual(result.messages[4]["content"], "The user has been quiet for a while") + self.assertEqual(result.last_summarized_index, 4) + + def test_get_messages_to_summarize_system_at_index_0_with_mid_system(self): + """Test that only messages[0] system message is preserved, not later ones.""" + context = LLMContext() + + context.add_message({"role": "system", "content": "You are helpful"}) + context.add_message({"role": "user", "content": "Hello"}) + context.add_message({"role": "assistant", "content": "Hi"}) + # Mid-conversation system injection + context.add_message({"role": "system", "content": "The user seems frustrated"}) + context.add_message({"role": "user", "content": "Help me"}) + context.add_message({"role": "assistant", "content": "Sure"}) + + # Keep last 2 messages + result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2) + + # Should skip index 0 (system preamble), summarize indices 1-3 + self.assertEqual(len(result.messages), 3) + self.assertEqual(result.messages[0]["content"], "Hello") + # Mid-conversation system message is summarized, not treated as preamble + self.assertEqual(result.messages[2]["content"], "The user seems frustrated") + self.assertEqual(result.last_summarized_index, 3) + def test_format_messages_for_summary(self): """Test message formatting for summary."""