Add trace logging for previous_response_id decisions and fix example

Add detailed trace-level logging to _apply_previous_response_optimization showing why the optimization was applied or fell back to full context, including the relevant data for debugging. Use append_to_context=False for the filler TTSSpeakFrame in the function-calling example to avoid altering the conversation history and breaking the previous_response_id prefix match.
2026-03-25 15:37:20 -04:00
parent 9defff2a34
commit 1c8d31de70
2 changed files with 51 additions and 22 deletions
--- a/examples/foundational/14-function-calling-openai-responses.py
+++ b/examples/foundational/14-function-calling-openai-responses.py
@@ -86,7 +86,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):

    @llm.event_handler("on_function_calls_started")
    async def on_function_calls_started(service, function_calls):
-        await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
+        # Avoid appending this filler message to the LLM context — it would
+        # alter the conversation history and prevent
+        # OpenAIResponsesLLMService's previous_response_id optimization from
+        # matching, forcing a full context resend.
+        await tts.queue_frame(TTSSpeakFrame("Let me check on that.", append_to_context=False))

    weather_function = FunctionSchema(
        name="get_current_weather",
--- a/src/pipecat/services/openai/responses/llm.py
+++ b/src/pipecat/services/openai/responses/llm.py
@@ -491,31 +491,56 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService):
        Returns:
            The (possibly modified) params dict.
        """
+        if self._previous_response_id is None:
+            logger.trace(
+                f"{self}: Sending full context ({len(full_input)} items) — no previous response"
+            )
+            return params
+
        if (
-            self._previous_response_id is not None
-            and self._previous_input_length is not None
-            and self._previous_input_hash is not None
-            and len(full_input) > self._previous_input_length
+            self._previous_input_length is None
+            or self._previous_input_hash is None
+            or len(full_input) <= self._previous_input_length
        ):
-            prefix = full_input[: self._previous_input_length]
-            prefix_hash = self._hash_input_items(prefix)
-            if prefix_hash == self._previous_input_hash:
-                items_after_prefix = full_input[self._previous_input_length :]
-                response_output = self._previous_response_output or []
+            logger.trace(
+                f"{self}: Sending full context ({len(full_input)} items) — "
+                f"input not longer than previous ({self._previous_input_length})"
+            )
+            return params

-                if self._starts_with_response_output(items_after_prefix, response_output):
-                    # The server already knows its own output — skip those items
-                    items_to_send = items_after_prefix[len(response_output) :]
-                    cached = self._previous_input_length + len(response_output)
-                    params["input"] = items_to_send
-                    params["previous_response_id"] = self._previous_response_id
-                    logger.debug(
-                        f"{self}: Sending incremental context via previous_response_id "
-                        f"({len(items_to_send)} new items, {cached} cached)"
-                    )
-                    return params
+        prefix = full_input[: self._previous_input_length]
+        prefix_hash = self._hash_input_items(prefix)
+        if prefix_hash != self._previous_input_hash:
+            logger.trace(
+                f"{self}: Sending full context ({len(full_input)} items) — "
+                f"input prefix hash mismatch "
+                f"(previous input: {json.dumps(prefix, indent=2, default=str)}, "
+                f"expected hash: {self._previous_input_hash}, "
+                f"actual hash: {prefix_hash})"
+            )
+            return params

-        logger.debug(f"{self}: Sending full context ({len(full_input)} items)")
+        items_after_prefix = full_input[self._previous_input_length :]
+        response_output = self._previous_response_output or []
+
+        if not self._starts_with_response_output(items_after_prefix, response_output):
+            logger.trace(
+                f"{self}: Sending full context ({len(full_input)} items) — "
+                f"response output mismatch after prefix "
+                f"(previous response output: {json.dumps(response_output, indent=2, default=str)}, "
+                f"items after prefix: {json.dumps(items_after_prefix, indent=2, default=str)})"
+            )
+            return params
+
+        # The server already knows its own output — skip those items
+        items_to_send = items_after_prefix[len(response_output) :]
+        cached = self._previous_input_length + len(response_output)
+        params["input"] = items_to_send
+        params["previous_response_id"] = self._previous_response_id
+        logger.trace(
+            f"{self}: Sending incremental context via previous_response_id "
+            f"({len(items_to_send)} new items, {cached} cached)"
+        )
        return params

    @staticmethod