Fix context summarization leaving orphaned tool responses in kept context.

2026-03-06 11:40:27 -03:00
parent 3199168d3e
commit 4ef3b52c72
2 changed files with 164 additions and 8 deletions
--- a/src/pipecat/utils/context/llm_context_summarization.py
+++ b/src/pipecat/utils/context/llm_context_summarization.py
@@ -382,25 +382,33 @@ class LLMContextSummarizationUtil:
        return total

    @staticmethod
-    def _get_function_calls_in_progress_index(messages: List[dict], start_idx: int) -> int:
+    def _get_function_calls_in_progress_index(
+        messages: List[dict], start_idx: int, summary_end: int
+    ) -> int:
        """Find the earliest message index with incomplete function calls.

-        Scans messages to identify function/tool calls that haven't received
-        their results yet. This prevents summarizing incomplete tool interactions
-        which would break the request-response pairing.
+        Scans messages from ``start_idx`` up to (but not including)
+        ``summary_end`` to identify tool calls whose responses either don't
+        exist yet or fall in the kept portion of the context (>= summary_end).
+        This prevents summarizing tool call requests when their responses would
+        remain in the kept context as orphans, which the OpenAI API rejects.

        Args:
            messages: List of messages to check.
            start_idx: Index to start checking from.
+            summary_end: Exclusive upper bound for the scan (the first kept
+                message index). Only tool responses within this range count as
+                completing a call; responses beyond it are treated as absent,
+                leaving the call "in progress".

        Returns:
            Index of first message with function call in progress, or -1 if all
-            function calls are complete.
+            function calls are complete within the scanned range.
        """
        # Track tool call IDs mapped to their message index
        pending_tool_calls: dict[str, int] = {}

-        for i in range(start_idx, len(messages)):
+        for i in range(start_idx, summary_end):
            msg = messages[i]
            # LLMSpecificMessage instances (e.g. thinking blocks) never carry tool_call or
            # tool_call_id fields, so they cannot affect the pending-call tracking. Skipping
@@ -484,7 +492,7 @@ class LLMContextSummarizationUtil:

        # Check for function calls in progress in the range we want to summarize
        function_call_start = LLMContextSummarizationUtil._get_function_calls_in_progress_index(
-            messages, summary_start
+            messages, summary_start, summary_end
        )
        if function_call_start >= 0 and function_call_start < summary_end:
            # Stop summarization before the function call
--- a/tests/test_context_summarization.py
+++ b/tests/test_context_summarization.py
@@ -954,6 +954,152 @@ class TestDedicatedLLMSummarization(unittest.IsolatedAsyncioTestCase):
        await summarizer.cleanup()


+class TestOrphanedToolResponseDetection(unittest.TestCase):
+    """Tests that tool responses in the kept range are treated as orphans.
+
+    The scan in _get_function_calls_in_progress_index is bounded by summary_end,
+    so a tool response that falls in the kept portion (>= summary_end) never
+    resolves its matching tool call.  This ensures the assistant+tool_calls
+    message and all its responses stay together in the kept range.
+    """
+
+    def test_tool_response_in_kept_range_is_treated_as_orphan(self):
+        """Tool response in the kept range causes the tool call to be kept too."""
+        context = LLMContext()
+        context.add_message({"role": "system", "content": "System prompt"})  # idx 0
+        context.add_message({"role": "user", "content": "Hello"})  # idx 1
+        context.add_message(  # idx 2: assistant with tool_call
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "fn", "arguments": "{}"},
+                    }
+                ],
+            }
+        )
+        context.add_message(
+            {"role": "tool", "tool_call_id": "call_1", "content": "result"}
+        )  # idx 3 (kept)
+        context.add_message({"role": "user", "content": "Thanks"})  # idx 4 (kept)
+
+        # Keep 2: summary_end=3. The tool response at idx 3 is outside the scan
+        # range → call_1 stays pending → boundary moves back to idx 2.
+        result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2)
+        self.assertEqual(result.last_summarized_index, 1)
+        self.assertEqual(result.messages[-1]["content"], "Hello")
+
+    def test_tool_response_in_summarized_range_is_not_orphan(self):
+        """Tool response within the summarized range correctly resolves its call."""
+        context = LLMContext()
+        context.add_message({"role": "system", "content": "System prompt"})  # idx 0
+        context.add_message({"role": "user", "content": "Hello"})  # idx 1
+        context.add_message(  # idx 2: assistant with tool_call
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "fn", "arguments": "{}"},
+                    }
+                ],
+            }
+        )
+        context.add_message(
+            {"role": "tool", "tool_call_id": "call_1", "content": "result"}
+        )  # idx 3
+        context.add_message({"role": "assistant", "content": "Done"})  # idx 4
+        context.add_message({"role": "user", "content": "Thanks"})  # idx 5 (kept)
+
+        # Keep 1: summary_end=5. Both the tool call (idx 2) and its response
+        # (idx 3) are within the scan range → resolved → no adjustment.
+        result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 1)
+        self.assertEqual(result.last_summarized_index, 4)
+        self.assertEqual(len(result.messages), 4)
+
+    def test_partial_responses_in_kept_range_moves_back(self):
+        """When only some tool responses are in the kept range the whole group is kept."""
+        context = LLMContext()
+        context.add_message({"role": "system", "content": "System prompt"})  # idx 0
+        context.add_message({"role": "user", "content": "Hello"})  # idx 1
+        context.add_message(  # idx 2: assistant with two tool_calls
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_a",
+                        "type": "function",
+                        "function": {"name": "fn_a", "arguments": "{}"},
+                    },
+                    {
+                        "id": "call_b",
+                        "type": "function",
+                        "function": {"name": "fn_b", "arguments": "{}"},
+                    },
+                ],
+            }
+        )
+        context.add_message(
+            {"role": "tool", "tool_call_id": "call_a", "content": "result_a"}
+        )  # idx 3
+        context.add_message(
+            {"role": "tool", "tool_call_id": "call_b", "content": "result_b"}
+        )  # idx 4 (kept)
+        context.add_message({"role": "user", "content": "Thanks"})  # idx 5 (kept)
+
+        # Keep 2: summary_end=4. call_a is resolved (idx 3 is in scan range) but
+        # call_b's response (idx 4) is outside → call_b stays pending →
+        # function_call_start=2 → boundary moves back to idx 2.
+        result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2)
+        self.assertEqual(result.last_summarized_index, 1)
+        self.assertEqual(result.messages[-1]["content"], "Hello")
+
+    def test_non_adjacent_orphan_in_kept_range_moves_back(self):
+        """Orphaned tool response deeper in the kept range (not at the boundary) is detected."""
+        context = LLMContext()
+        context.add_message({"role": "system", "content": "System prompt"})  # idx 0
+        context.add_message({"role": "user", "content": "Hello"})  # idx 1
+        context.add_message(  # idx 2: assistant with two tool_calls
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_a",
+                        "type": "function",
+                        "function": {"name": "fn_a", "arguments": "{}"},
+                    },
+                    {
+                        "id": "call_b",
+                        "type": "function",
+                        "function": {"name": "fn_b", "arguments": "{}"},
+                    },
+                ],
+            }
+        )
+        context.add_message(
+            {"role": "tool", "tool_call_id": "call_a", "content": "result_a"}
+        )  # idx 3
+        context.add_message({"role": "user", "content": "Intermediate"})  # idx 4 (kept)
+        context.add_message(
+            {"role": "tool", "tool_call_id": "call_b", "content": "result_b"}
+        )  # idx 5 (kept) — NOT adjacent to the boundary
+        context.add_message({"role": "user", "content": "Latest"})  # idx 6 (kept)
+
+        # Keep 3: summary_end=4. call_b's response is at idx 5, two hops into
+        # the kept range. The scan stops at idx 4, so call_b is never resolved →
+        # function_call_start=2 → boundary moves back to idx 2.
+        result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 3)
+        self.assertEqual(result.last_summarized_index, 1)
+        self.assertEqual(result.messages[-1]["content"], "Hello")
+
+
 class TestLLMSpecificMessageHandling(unittest.TestCase):
    """Tests that LLMSpecificMessage objects are correctly skipped in summarization."""

@@ -1022,7 +1168,9 @@ class TestLLMSpecificMessageHandling(unittest.TestCase):
            {"role": "tool", "tool_call_id": "call_123", "content": '{"time": "10:30 AM"}'},
        ]

-        result = LLMContextSummarizationUtil._get_function_calls_in_progress_index(messages, 0)
+        result = LLMContextSummarizationUtil._get_function_calls_in_progress_index(
+            messages, 0, len(messages)
+        )
        self.assertEqual(result, -1)