From 1c8d31de70aaec99395659fc19447f9719f8e521 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 25 Mar 2026 15:37:20 -0400 Subject: [PATCH] Add trace logging for previous_response_id decisions and fix example Add detailed trace-level logging to _apply_previous_response_optimization showing why the optimization was applied or fell back to full context, including the relevant data for debugging. Use append_to_context=False for the filler TTSSpeakFrame in the function-calling example to avoid altering the conversation history and breaking the previous_response_id prefix match. --- .../14-function-calling-openai-responses.py | 6 +- src/pipecat/services/openai/responses/llm.py | 67 +++++++++++++------ 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/examples/foundational/14-function-calling-openai-responses.py b/examples/foundational/14-function-calling-openai-responses.py index 58cac774a..afdb92aa8 100644 --- a/examples/foundational/14-function-calling-openai-responses.py +++ b/examples/foundational/14-function-calling-openai-responses.py @@ -86,7 +86,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): @llm.event_handler("on_function_calls_started") async def on_function_calls_started(service, function_calls): - await tts.queue_frame(TTSSpeakFrame("Let me check on that.")) + # Avoid appending this filler message to the LLM context — it would + # alter the conversation history and prevent + # OpenAIResponsesLLMService's previous_response_id optimization from + # matching, forcing a full context resend. + await tts.queue_frame(TTSSpeakFrame("Let me check on that.", append_to_context=False)) weather_function = FunctionSchema( name="get_current_weather", diff --git a/src/pipecat/services/openai/responses/llm.py b/src/pipecat/services/openai/responses/llm.py index 3745e40b9..3aaa63d13 100644 --- a/src/pipecat/services/openai/responses/llm.py +++ b/src/pipecat/services/openai/responses/llm.py @@ -491,31 +491,56 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService): Returns: The (possibly modified) params dict. """ + if self._previous_response_id is None: + logger.trace( + f"{self}: Sending full context ({len(full_input)} items) — no previous response" + ) + return params + if ( - self._previous_response_id is not None - and self._previous_input_length is not None - and self._previous_input_hash is not None - and len(full_input) > self._previous_input_length + self._previous_input_length is None + or self._previous_input_hash is None + or len(full_input) <= self._previous_input_length ): - prefix = full_input[: self._previous_input_length] - prefix_hash = self._hash_input_items(prefix) - if prefix_hash == self._previous_input_hash: - items_after_prefix = full_input[self._previous_input_length :] - response_output = self._previous_response_output or [] + logger.trace( + f"{self}: Sending full context ({len(full_input)} items) — " + f"input not longer than previous ({self._previous_input_length})" + ) + return params - if self._starts_with_response_output(items_after_prefix, response_output): - # The server already knows its own output — skip those items - items_to_send = items_after_prefix[len(response_output) :] - cached = self._previous_input_length + len(response_output) - params["input"] = items_to_send - params["previous_response_id"] = self._previous_response_id - logger.debug( - f"{self}: Sending incremental context via previous_response_id " - f"({len(items_to_send)} new items, {cached} cached)" - ) - return params + prefix = full_input[: self._previous_input_length] + prefix_hash = self._hash_input_items(prefix) + if prefix_hash != self._previous_input_hash: + logger.trace( + f"{self}: Sending full context ({len(full_input)} items) — " + f"input prefix hash mismatch " + f"(previous input: {json.dumps(prefix, indent=2, default=str)}, " + f"expected hash: {self._previous_input_hash}, " + f"actual hash: {prefix_hash})" + ) + return params - logger.debug(f"{self}: Sending full context ({len(full_input)} items)") + items_after_prefix = full_input[self._previous_input_length :] + response_output = self._previous_response_output or [] + + if not self._starts_with_response_output(items_after_prefix, response_output): + logger.trace( + f"{self}: Sending full context ({len(full_input)} items) — " + f"response output mismatch after prefix " + f"(previous response output: {json.dumps(response_output, indent=2, default=str)}, " + f"items after prefix: {json.dumps(items_after_prefix, indent=2, default=str)})" + ) + return params + + # The server already knows its own output — skip those items + items_to_send = items_after_prefix[len(response_output) :] + cached = self._previous_input_length + len(response_output) + params["input"] = items_to_send + params["previous_response_id"] = self._previous_response_id + logger.trace( + f"{self}: Sending incremental context via previous_response_id " + f"({len(items_to_send)} new items, {cached} cached)" + ) return params @staticmethod