Add trace logging for previous_response_id decisions and fix example

Add detailed trace-level logging to _apply_previous_response_optimization
showing why the optimization was applied or fell back to full context,
including the relevant data for debugging.

Use append_to_context=False for the filler TTSSpeakFrame in the
function-calling example to avoid altering the conversation history
and breaking the previous_response_id prefix match.
This commit is contained in:
Paul Kompfner
2026-03-25 15:37:20 -04:00
parent 9defff2a34
commit 1c8d31de70
2 changed files with 51 additions and 22 deletions

View File

@@ -86,7 +86,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
@llm.event_handler("on_function_calls_started")
async def on_function_calls_started(service, function_calls):
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
# Avoid appending this filler message to the LLM context — it would
# alter the conversation history and prevent
# OpenAIResponsesLLMService's previous_response_id optimization from
# matching, forcing a full context resend.
await tts.queue_frame(TTSSpeakFrame("Let me check on that.", append_to_context=False))
weather_function = FunctionSchema(
name="get_current_weather",

View File

@@ -491,31 +491,56 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService):
Returns:
The (possibly modified) params dict.
"""
if self._previous_response_id is None:
logger.trace(
f"{self}: Sending full context ({len(full_input)} items) — no previous response"
)
return params
if (
self._previous_response_id is not None
and self._previous_input_length is not None
and self._previous_input_hash is not None
and len(full_input) > self._previous_input_length
self._previous_input_length is None
or self._previous_input_hash is None
or len(full_input) <= self._previous_input_length
):
prefix = full_input[: self._previous_input_length]
prefix_hash = self._hash_input_items(prefix)
if prefix_hash == self._previous_input_hash:
items_after_prefix = full_input[self._previous_input_length :]
response_output = self._previous_response_output or []
logger.trace(
f"{self}: Sending full context ({len(full_input)} items) — "
f"input not longer than previous ({self._previous_input_length})"
)
return params
if self._starts_with_response_output(items_after_prefix, response_output):
# The server already knows its own output — skip those items
items_to_send = items_after_prefix[len(response_output) :]
cached = self._previous_input_length + len(response_output)
params["input"] = items_to_send
params["previous_response_id"] = self._previous_response_id
logger.debug(
f"{self}: Sending incremental context via previous_response_id "
f"({len(items_to_send)} new items, {cached} cached)"
)
return params
prefix = full_input[: self._previous_input_length]
prefix_hash = self._hash_input_items(prefix)
if prefix_hash != self._previous_input_hash:
logger.trace(
f"{self}: Sending full context ({len(full_input)} items) — "
f"input prefix hash mismatch "
f"(previous input: {json.dumps(prefix, indent=2, default=str)}, "
f"expected hash: {self._previous_input_hash}, "
f"actual hash: {prefix_hash})"
)
return params
logger.debug(f"{self}: Sending full context ({len(full_input)} items)")
items_after_prefix = full_input[self._previous_input_length :]
response_output = self._previous_response_output or []
if not self._starts_with_response_output(items_after_prefix, response_output):
logger.trace(
f"{self}: Sending full context ({len(full_input)} items) — "
f"response output mismatch after prefix "
f"(previous response output: {json.dumps(response_output, indent=2, default=str)}, "
f"items after prefix: {json.dumps(items_after_prefix, indent=2, default=str)})"
)
return params
# The server already knows its own output — skip those items
items_to_send = items_after_prefix[len(response_output) :]
cached = self._previous_input_length + len(response_output)
params["input"] = items_to_send
params["previous_response_id"] = self._previous_response_id
logger.trace(
f"{self}: Sending incremental context via previous_response_id "
f"({len(items_to_send)} new items, {cached} cached)"
)
return params
@staticmethod