From 1c8d31de70aaec99395659fc19447f9719f8e521 Mon Sep 17 00:00:00 2001
From: Paul Kompfner <paul@daily.co>
Date: Wed, 25 Mar 2026 15:37:20 -0400
Subject: [PATCH] Add trace logging for previous_response_id decisions and fix
 example

Add detailed trace-level logging to _apply_previous_response_optimization
showing why the optimization was applied or fell back to full context,
including the relevant data for debugging.

Use append_to_context=False for the filler TTSSpeakFrame in the
function-calling example to avoid altering the conversation history
and breaking the previous_response_id prefix match.
---
 .../14-function-calling-openai-responses.py   |  6 +-
 src/pipecat/services/openai/responses/llm.py  | 67 +++++++++++++------
 2 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/examples/foundational/14-function-calling-openai-responses.py b/examples/foundational/14-function-calling-openai-responses.py
index 58cac774a..afdb92aa8 100644
--- a/examples/foundational/14-function-calling-openai-responses.py
+++ b/examples/foundational/14-function-calling-openai-responses.py
@@ -86,7 +86,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
 
     @llm.event_handler("on_function_calls_started")
     async def on_function_calls_started(service, function_calls):
-        await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
+        # Avoid appending this filler message to the LLM context — it would
+        # alter the conversation history and prevent
+        # OpenAIResponsesLLMService's previous_response_id optimization from
+        # matching, forcing a full context resend.
+        await tts.queue_frame(TTSSpeakFrame("Let me check on that.", append_to_context=False))
 
     weather_function = FunctionSchema(
         name="get_current_weather",
diff --git a/src/pipecat/services/openai/responses/llm.py b/src/pipecat/services/openai/responses/llm.py
index 3745e40b9..3aaa63d13 100644
--- a/src/pipecat/services/openai/responses/llm.py
+++ b/src/pipecat/services/openai/responses/llm.py
@@ -491,31 +491,56 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService):
         Returns:
             The (possibly modified) params dict.
         """
+        if self._previous_response_id is None:
+            logger.trace(
+                f"{self}: Sending full context ({len(full_input)} items) — no previous response"
+            )
+            return params
+
         if (
-            self._previous_response_id is not None
-            and self._previous_input_length is not None
-            and self._previous_input_hash is not None
-            and len(full_input) > self._previous_input_length
+            self._previous_input_length is None
+            or self._previous_input_hash is None
+            or len(full_input) <= self._previous_input_length
         ):
-            prefix = full_input[: self._previous_input_length]
-            prefix_hash = self._hash_input_items(prefix)
-            if prefix_hash == self._previous_input_hash:
-                items_after_prefix = full_input[self._previous_input_length :]
-                response_output = self._previous_response_output or []
+            logger.trace(
+                f"{self}: Sending full context ({len(full_input)} items) — "
+                f"input not longer than previous ({self._previous_input_length})"
+            )
+            return params
 
-                if self._starts_with_response_output(items_after_prefix, response_output):
-                    # The server already knows its own output — skip those items
-                    items_to_send = items_after_prefix[len(response_output) :]
-                    cached = self._previous_input_length + len(response_output)
-                    params["input"] = items_to_send
-                    params["previous_response_id"] = self._previous_response_id
-                    logger.debug(
-                        f"{self}: Sending incremental context via previous_response_id "
-                        f"({len(items_to_send)} new items, {cached} cached)"
-                    )
-                    return params
+        prefix = full_input[: self._previous_input_length]
+        prefix_hash = self._hash_input_items(prefix)
+        if prefix_hash != self._previous_input_hash:
+            logger.trace(
+                f"{self}: Sending full context ({len(full_input)} items) — "
+                f"input prefix hash mismatch "
+                f"(previous input: {json.dumps(prefix, indent=2, default=str)}, "
+                f"expected hash: {self._previous_input_hash}, "
+                f"actual hash: {prefix_hash})"
+            )
+            return params
 
-        logger.debug(f"{self}: Sending full context ({len(full_input)} items)")
+        items_after_prefix = full_input[self._previous_input_length :]
+        response_output = self._previous_response_output or []
+
+        if not self._starts_with_response_output(items_after_prefix, response_output):
+            logger.trace(
+                f"{self}: Sending full context ({len(full_input)} items) — "
+                f"response output mismatch after prefix "
+                f"(previous response output: {json.dumps(response_output, indent=2, default=str)}, "
+                f"items after prefix: {json.dumps(items_after_prefix, indent=2, default=str)})"
+            )
+            return params
+
+        # The server already knows its own output — skip those items
+        items_to_send = items_after_prefix[len(response_output) :]
+        cached = self._previous_input_length + len(response_output)
+        params["input"] = items_to_send
+        params["previous_response_id"] = self._previous_response_id
+        logger.trace(
+            f"{self}: Sending incremental context via previous_response_id "
+            f"({len(items_to_send)} new items, {cached} cached)"
+        )
         return params
 
     @staticmethod