Empirical testing showed the previous design — grafting a verbose re-invocation reminder into the payload's `description` field for started and intermediate messages — was actually making Nova Sonic *worse*, not better: more spurious re-invocations of the same tool, not fewer. Plausibly the long, instruction-shaped description text reads as content the model has to respond to, where a terse status update reads as ambient state. Replace the reminder grafting with a caller-supplied `template` keyword argument on `prepare_message_payload_for_realtime`. When `None` (the default), the payload is serialized to its canonical JSON form. When provided, `template.format(tool_call_id=…, status=…, result=…, description=…)` is applied. The template is honored across all kinds, so callers route per kind based on which wire channel they're using. Nova Sonic now defines its own bracketed plain-text template (`_ASYNC_TOOL_RESULT_TEXT_TEMPLATE`) and applies it on the cross-modal user-text channel (intermediate / final). The started path stays on raw JSON (the formal AWS tool-result channel requires valid JSON). A code comment at the template constant captures the empirical finding for the next person — short framing yields much better behavior, surprising as it sounds. Tests updated for the new template behavior across all kinds. Also reverts a stream-tool example sleep-duration tweak (20s → 10s) and adds a commented-out alternative in the function-calling-openai-async-stream example for parallel testing.
322 lines
13 KiB
Python
322 lines
13 KiB
Python
#
|
|
# Copyright (c) 2024-2026, Daily
|
|
#
|
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
#
|
|
|
|
import json
|
|
import unittest
|
|
|
|
from pipecat.processors.aggregators import async_tool_messages
|
|
|
|
# The parser tests intentionally exercise the parser via the canonical
|
|
# builders, so a drift between the two sides will surface as a parse failure
|
|
# in CI rather than as a silent contract break in production.
|
|
|
|
|
|
def _started_message(tool_call_id: str = "call_123") -> dict:
|
|
return async_tool_messages.build_started_message(tool_call_id)
|
|
|
|
|
|
def _intermediate_message(
|
|
tool_call_id: str = "call_123",
|
|
result: str = '"intermediate-1"',
|
|
) -> dict:
|
|
return async_tool_messages.build_intermediate_result_message(tool_call_id, result)
|
|
|
|
|
|
def _final_message(
|
|
tool_call_id: str = "call_123",
|
|
result: str = '"final-result"',
|
|
) -> dict:
|
|
return async_tool_messages.build_final_result_message(tool_call_id, result)
|
|
|
|
|
|
class TestParseMessage(unittest.TestCase):
|
|
def test_parses_started(self):
|
|
info = async_tool_messages.parse_message(_started_message("abc"))
|
|
assert info is not None
|
|
assert info.kind == "started"
|
|
assert info.tool_call_id == "abc"
|
|
assert info.status == "running"
|
|
assert info.result is None
|
|
assert "asynchronous task" in info.description
|
|
|
|
def test_parses_intermediate(self):
|
|
info = async_tool_messages.parse_message(_intermediate_message("abc", '"hello"'))
|
|
assert info is not None
|
|
assert info.kind == "intermediate"
|
|
assert info.tool_call_id == "abc"
|
|
assert info.status == "running"
|
|
assert info.result == '"hello"'
|
|
|
|
def test_parses_final(self):
|
|
info = async_tool_messages.parse_message(_final_message("abc", '"done"'))
|
|
assert info is not None
|
|
assert info.kind == "final"
|
|
assert info.tool_call_id == "abc"
|
|
assert info.status == "finished"
|
|
assert info.result == '"done"'
|
|
|
|
def test_parses_completed_sentinel_result(self):
|
|
# When a function returns no value, the aggregator sets the result to
|
|
# the literal "COMPLETED" — same convention used for synchronous tool
|
|
# calls. The parser doesn't treat it specially; it's just a string.
|
|
info = async_tool_messages.parse_message(_final_message("abc", "COMPLETED"))
|
|
assert info is not None
|
|
assert info.kind == "final"
|
|
assert info.result == "COMPLETED"
|
|
|
|
def test_returns_none_for_regular_user_message(self):
|
|
assert async_tool_messages.parse_message({"role": "user", "content": "hello"}) is None
|
|
|
|
def test_returns_none_for_regular_assistant_message(self):
|
|
assert async_tool_messages.parse_message({"role": "assistant", "content": "hi"}) is None
|
|
|
|
def test_returns_none_for_regular_tool_message(self):
|
|
# IN_PROGRESS / regular tool result string content.
|
|
assert (
|
|
async_tool_messages.parse_message(
|
|
{"role": "tool", "tool_call_id": "x", "content": "IN_PROGRESS"}
|
|
)
|
|
is None
|
|
)
|
|
assert (
|
|
async_tool_messages.parse_message(
|
|
{"role": "tool", "tool_call_id": "x", "content": "weather: sunny"}
|
|
)
|
|
is None
|
|
)
|
|
|
|
def test_returns_none_for_developer_message_without_payload(self):
|
|
# role=developer is also used for non-async-tool things (potentially).
|
|
assert (
|
|
async_tool_messages.parse_message(
|
|
{"role": "developer", "content": "some other developer note"}
|
|
)
|
|
is None
|
|
)
|
|
|
|
def test_returns_none_for_invalid_json_content(self):
|
|
assert async_tool_messages.parse_message({"role": "tool", "content": "{not json"}) is None
|
|
|
|
def test_returns_none_for_non_dict_json(self):
|
|
assert async_tool_messages.parse_message({"role": "tool", "content": "[1, 2, 3]"}) is None
|
|
|
|
def test_returns_none_for_wrong_payload_type(self):
|
|
assert (
|
|
async_tool_messages.parse_message(
|
|
{
|
|
"role": "tool",
|
|
"content": json.dumps({"type": "something_else", "tool_call_id": "x"}),
|
|
}
|
|
)
|
|
is None
|
|
)
|
|
|
|
def test_returns_none_when_tool_call_id_missing(self):
|
|
assert (
|
|
async_tool_messages.parse_message(
|
|
{
|
|
"role": "tool",
|
|
"content": json.dumps({"type": "async_tool", "status": "running"}),
|
|
}
|
|
)
|
|
is None
|
|
)
|
|
|
|
def test_returns_none_when_status_invalid(self):
|
|
assert (
|
|
async_tool_messages.parse_message(
|
|
{
|
|
"role": "tool",
|
|
"content": json.dumps(
|
|
{"type": "async_tool", "tool_call_id": "x", "status": "weird"}
|
|
),
|
|
}
|
|
)
|
|
is None
|
|
)
|
|
|
|
def test_returns_none_for_non_string_content(self):
|
|
# A multimodal message with content as a list would not be an async-tool message.
|
|
assert (
|
|
async_tool_messages.parse_message(
|
|
{"role": "tool", "content": [{"type": "text", "text": "hi"}]}
|
|
)
|
|
is None
|
|
)
|
|
|
|
def test_returns_none_for_missing_role(self):
|
|
assert async_tool_messages.parse_message({"content": "{}"}) is None
|
|
|
|
|
|
class TestBuilders(unittest.TestCase):
|
|
"""Verify the builders produce the canonical payload shape and round-trip cleanly."""
|
|
|
|
def test_started_message_shape(self):
|
|
msg = async_tool_messages.build_started_message("call_42")
|
|
# Top-level: role=tool plus the tool_call_id (so the message can sit
|
|
# alongside other regular tool messages in the context).
|
|
assert msg["role"] == "tool"
|
|
assert msg["tool_call_id"] == "call_42"
|
|
payload = json.loads(msg["content"])
|
|
assert payload["type"] == "async_tool"
|
|
assert payload["status"] == "running"
|
|
assert payload["tool_call_id"] == "call_42"
|
|
assert "result" not in payload
|
|
assert isinstance(payload["description"], str) and payload["description"]
|
|
|
|
def test_intermediate_message_shape(self):
|
|
msg = async_tool_messages.build_intermediate_result_message("call_99", '"step-1"')
|
|
# Intermediate/final use role=developer and don't carry tool_call_id at
|
|
# the top level (that's only inside the payload).
|
|
assert msg["role"] == "developer"
|
|
assert "tool_call_id" not in msg
|
|
payload = json.loads(msg["content"])
|
|
assert payload["type"] == "async_tool"
|
|
assert payload["status"] == "running"
|
|
assert payload["tool_call_id"] == "call_99"
|
|
assert payload["result"] == '"step-1"'
|
|
assert isinstance(payload["description"], str) and payload["description"]
|
|
|
|
def test_final_message_shape(self):
|
|
msg = async_tool_messages.build_final_result_message("call_7", '"all-done"')
|
|
assert msg["role"] == "developer"
|
|
assert "tool_call_id" not in msg
|
|
payload = json.loads(msg["content"])
|
|
assert payload["type"] == "async_tool"
|
|
assert payload["status"] == "finished"
|
|
assert payload["tool_call_id"] == "call_7"
|
|
assert payload["result"] == '"all-done"'
|
|
assert isinstance(payload["description"], str) and payload["description"]
|
|
|
|
def test_final_message_with_completed_sentinel(self):
|
|
# The aggregator passes the literal "COMPLETED" string when the
|
|
# function returned no value (same convention as for synchronous
|
|
# tool calls). The builder doesn't treat it specially; it just
|
|
# round-trips as the result.
|
|
msg = async_tool_messages.build_final_result_message("call_1", "COMPLETED")
|
|
payload = json.loads(msg["content"])
|
|
assert payload["result"] == "COMPLETED"
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
assert info.kind == "final"
|
|
assert info.result == "COMPLETED"
|
|
|
|
def test_started_round_trip(self):
|
|
msg = async_tool_messages.build_started_message("call_x")
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
assert info.kind == "started"
|
|
assert info.tool_call_id == "call_x"
|
|
assert info.status == "running"
|
|
assert info.result is None
|
|
|
|
def test_intermediate_round_trip(self):
|
|
msg = async_tool_messages.build_intermediate_result_message("call_x", '{"step": 1}')
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
assert info.kind == "intermediate"
|
|
assert info.tool_call_id == "call_x"
|
|
assert info.status == "running"
|
|
assert info.result == '{"step": 1}'
|
|
|
|
def test_final_round_trip(self):
|
|
msg = async_tool_messages.build_final_result_message("call_x", '{"answer": 42}')
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
assert info.kind == "final"
|
|
assert info.tool_call_id == "call_x"
|
|
assert info.status == "finished"
|
|
assert info.result == '{"answer": 42}'
|
|
|
|
|
|
class TestPrepareMessagePayloadForRealtime(unittest.TestCase):
|
|
"""Verify the realtime preparation behavior across kinds and template usage."""
|
|
|
|
# --- Default (no template) → raw JSON pass-through -----------------------
|
|
|
|
def test_started_default_is_raw_json(self):
|
|
msg = async_tool_messages.build_started_message("call_42")
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
text = async_tool_messages.prepare_message_payload_for_realtime(info)
|
|
decoded = json.loads(text)
|
|
assert decoded["type"] == "async_tool"
|
|
assert decoded["tool_call_id"] == "call_42"
|
|
assert decoded["status"] == "running"
|
|
# Started payloads have no result field.
|
|
assert "result" not in decoded
|
|
|
|
def test_intermediate_default_is_raw_json(self):
|
|
msg = async_tool_messages.build_intermediate_result_message("call_42", '"step-1"')
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
text = async_tool_messages.prepare_message_payload_for_realtime(info)
|
|
decoded = json.loads(text)
|
|
assert decoded["type"] == "async_tool"
|
|
assert decoded["tool_call_id"] == "call_42"
|
|
assert decoded["status"] == "running"
|
|
assert decoded["result"] == '"step-1"'
|
|
|
|
def test_final_default_is_raw_json(self):
|
|
msg = async_tool_messages.build_final_result_message("call_42", '"the answer"')
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
text = async_tool_messages.prepare_message_payload_for_realtime(info)
|
|
decoded = json.loads(text)
|
|
assert decoded["type"] == "async_tool"
|
|
assert decoded["tool_call_id"] == "call_42"
|
|
assert decoded["status"] == "finished"
|
|
assert decoded["result"] == '"the answer"'
|
|
|
|
# --- Caller-supplied template applied across kinds -----------------------
|
|
|
|
def test_template_applied_to_started(self):
|
|
msg = async_tool_messages.build_started_message("call_42")
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
text = async_tool_messages.prepare_message_payload_for_realtime(
|
|
info,
|
|
template="[{tool_call_id} {status}] {result}",
|
|
)
|
|
# Started has no result; substitution yields empty string after the bracket.
|
|
assert text == "[call_42 running] "
|
|
|
|
def test_template_applied_to_intermediate(self):
|
|
msg = async_tool_messages.build_intermediate_result_message("call_42", '"step-1"')
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
text = async_tool_messages.prepare_message_payload_for_realtime(
|
|
info,
|
|
template="[{tool_call_id} {status}] {result}",
|
|
)
|
|
assert text == '[call_42 running] "step-1"'
|
|
|
|
def test_template_applied_to_final(self):
|
|
msg = async_tool_messages.build_final_result_message("call_42", '"the answer"')
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
text = async_tool_messages.prepare_message_payload_for_realtime(
|
|
info,
|
|
template="[{tool_call_id} {status}] {result}",
|
|
)
|
|
assert text == '[call_42 finished] "the answer"'
|
|
|
|
def test_template_can_use_description_field(self):
|
|
msg = async_tool_messages.build_intermediate_result_message("call_42", '"step-1"')
|
|
info = async_tool_messages.parse_message(msg)
|
|
assert info is not None
|
|
text = async_tool_messages.prepare_message_payload_for_realtime(
|
|
info,
|
|
template="{description} >> {result}",
|
|
)
|
|
# The intermediate description text is preserved verbatim.
|
|
assert "intermediate result" in text
|
|
assert text.endswith('>> "step-1"')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|