From 84538b0ca89bb6770bc97246dd2dfbe83d83b631 Mon Sep 17 00:00:00 2001 From: Ali Alhoshaiyan Date: Thu, 15 Jan 2026 09:52:44 +0300 Subject: [PATCH 1/3] Reduce Call Tool Result Context Size by Allowing UTF-8 in JSON Serialization --- src/pipecat/services/anthropic/llm.py | 2 +- src/pipecat/services/aws/llm.py | 2 +- src/pipecat/services/openai/llm.py | 2 +- src/pipecat/services/openai_realtime_beta/openai.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index fcaed1dfe..2f375df82 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -1246,7 +1246,7 @@ class AnthropicAssistantContextAggregator(LLMAssistantContextAggregator): frame: Frame containing function call result. """ if frame.result: - result = json.dumps(frame.result) + result = json.dumps(frame.result, ensure_ascii=False) await self._update_function_call_result(frame.function_name, frame.tool_call_id, result) else: await self._update_function_call_result( diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index 59859abf7..92049dffb 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -691,7 +691,7 @@ class AWSBedrockAssistantContextAggregator(LLMAssistantContextAggregator): frame: The function call result frame to handle. """ if frame.result: - result = json.dumps(frame.result) + result = json.dumps(frame.result, ensure_ascii=False) await self._update_function_call_result(frame.function_name, frame.tool_call_id, result) else: await self._update_function_call_result( diff --git a/src/pipecat/services/openai/llm.py b/src/pipecat/services/openai/llm.py index abb3e7eec..553733922 100644 --- a/src/pipecat/services/openai/llm.py +++ b/src/pipecat/services/openai/llm.py @@ -255,7 +255,7 @@ class OpenAIAssistantContextAggregator(LLMAssistantContextAggregator): frame: Frame containing the function call result. """ if frame.result: - result = json.dumps(frame.result) + result = json.dumps(frame.result, ensure_ascii=False) await self._update_function_call_result(frame.function_name, frame.tool_call_id, result) else: await self._update_function_call_result( diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index 0a6315986..0d20039b1 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -441,7 +441,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): item = events.ConversationItem( type="function_call_output", call_id=frame.tool_call_id, - output=json.dumps(frame.result), + output=json.dumps(frame.result, ensure_ascii=False), ) await self.send_client_event(events.ConversationItemCreateEvent(item=item)) From 765fbeec630ef9ed2bf70bc8becdd80443aef4c8 Mon Sep 17 00:00:00 2001 From: Ali Alhoshaiyan Date: Thu, 15 Jan 2026 10:11:20 +0300 Subject: [PATCH 2/3] Add changelog --- changelog/3457.changed.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 changelog/3457.changed.md diff --git a/changelog/3457.changed.md b/changelog/3457.changed.md new file mode 100644 index 000000000..afd607d7a --- /dev/null +++ b/changelog/3457.changed.md @@ -0,0 +1,16 @@ +# Reduce Call Tool Result Context Size by Allowing UTF-8 in JSON Serialization + +This PR changes tool result serialization to prevent UTF-8 code points from being escaped during serialization. This drastically reduces the context size when returning a response that contains languages other than English. + +We have been running a monkey-patched version in production and it helped us improve the agent accuracy and control cost better. + +``` +>>> data = { "message": "أهلًا بالعالم" } +>>> json.dumps(data) +'{"message": "\\u0623\\u0647\\u0644\\u064b\\u0627 \\u0628\\u0627\\u0644\\u0639\\u0627\\u0644\\u0645"}' +>>> +>>> +>>> +>>> json.dumps(data, ensure_ascii=False) +'{"message": "أهلًا بالعالم"}' +``` From 1fe1f0f43929892c47379985ea854010ebd25e4a Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 12 Mar 2026 10:34:29 -0400 Subject: [PATCH 3/3] Apply ensure_ascii=False to remaining LLM services and fix changelog format --- changelog/3457.changed.md | 17 +---------------- src/pipecat/services/aws/nova_sonic/llm.py | 4 +++- src/pipecat/services/google/llm.py | 4 +++- src/pipecat/services/grok/realtime/llm.py | 2 +- src/pipecat/services/openai/realtime/llm.py | 2 +- 5 files changed, 9 insertions(+), 20 deletions(-) diff --git a/changelog/3457.changed.md b/changelog/3457.changed.md index afd607d7a..d0d82ad2d 100644 --- a/changelog/3457.changed.md +++ b/changelog/3457.changed.md @@ -1,16 +1 @@ -# Reduce Call Tool Result Context Size by Allowing UTF-8 in JSON Serialization - -This PR changes tool result serialization to prevent UTF-8 code points from being escaped during serialization. This drastically reduces the context size when returning a response that contains languages other than English. - -We have been running a monkey-patched version in production and it helped us improve the agent accuracy and control cost better. - -``` ->>> data = { "message": "أهلًا بالعالم" } ->>> json.dumps(data) -'{"message": "\\u0623\\u0647\\u0644\\u064b\\u0627 \\u0628\\u0627\\u0644\\u0639\\u0627\\u0644\\u0645"}' ->>> ->>> ->>> ->>> json.dumps(data, ensure_ascii=False) -'{"message": "أهلًا بالعالم"}' -``` +- Changed tool result JSON serialization to use `ensure_ascii=False`, preserving UTF-8 characters instead of escaping them. This reduces context size and token usage for non-English languages. diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index b1f56dbbd..0947ba1d7 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -1044,7 +1044,9 @@ class AWSNovaSonicLLMService(LLMService): "toolResult": { "promptName": self._prompt_name, "contentName": content_name, - "content": json.dumps(result) if isinstance(result, dict) else result, + "content": json.dumps(result, ensure_ascii=False) + if isinstance(result, dict) + else result, } } } diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 698b9f97f..26ad46311 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -200,7 +200,9 @@ class GoogleAssistantContextAggregator(OpenAIAssistantContextAggregator): if message.role == "user": for part in message.parts: if part.function_response and part.function_response.id == tool_call_id: - part.function_response.response = {"value": json.dumps(result)} + part.function_response.response = { + "value": json.dumps(result, ensure_ascii=False) + } @dataclass diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index bfa192aad..6e37d21d2 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -939,7 +939,7 @@ class GrokRealtimeLLMService(LLMService): item = events.ConversationItem( type="function_call_output", call_id=tool_call_id, - output=json.dumps(result), + output=json.dumps(result, ensure_ascii=False), ) await self.send_client_event(events.ConversationItemCreateEvent(item=item)) diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index faaa19884..bd31369ae 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -1128,7 +1128,7 @@ class OpenAIRealtimeLLMService(LLMService): item = events.ConversationItem( type="function_call_output", call_id=tool_call_id, - output=json.dumps(result), + output=json.dumps(result, ensure_ascii=False), ) await self.send_client_event(events.ConversationItemCreateEvent(item=item))