test: add run_inference tests for OpenAIResponsesLLMService

Uses real LLMContext and adapter (only HTTP client is mocked) to test
basic inference, client exception propagation, system_instruction
override, empty context fallback, and max_tokens override.
This commit is contained in:
Paul Kompfner
2026-03-18 14:17:21 -04:00
parent a7167ad121
commit c4f21ef76b

View File

@@ -15,7 +15,6 @@ from pipecat.adapters.services.anthropic_adapter import AnthropicLLMInvocationPa
from pipecat.adapters.services.bedrock_adapter import AWSBedrockLLMInvocationParams
from pipecat.adapters.services.gemini_adapter import GeminiLLMInvocationParams
from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
from pipecat.adapters.services.open_ai_responses_adapter import OpenAIResponsesLLMInvocationParams
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.services.anthropic.llm import AnthropicLLMService
from pipecat.services.aws.llm import AWSBedrockLLMService
@@ -786,42 +785,26 @@ async def test_openai_responses_run_inference_with_llm_context():
)
service._client = AsyncMock()
# Setup mocks
mock_context = MagicMock(spec=LLMContext)
mock_adapter = MagicMock()
test_input = [
{"role": "developer", "content": "You are a helpful assistant"},
{"role": "user", "content": "Hello, world!"},
]
mock_adapter.get_llm_invocation_params.return_value = OpenAIResponsesLLMInvocationParams(
input=test_input,
tools=OPENAI_NOT_GIVEN,
instructions="You are a helpful assistant",
context = LLMContext(
messages=[
{"role": "user", "content": "Hello, world!"},
]
)
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
# Mock response
mock_response = MagicMock()
mock_response.output_text = "Hello! How can I help you today?"
service._client.responses.create = AsyncMock(return_value=mock_response)
# Execute
result = await service.run_inference(mock_context)
result = await service.run_inference(context)
# Verify
assert result == "Hello! How can I help you today?"
service.get_llm_adapter.assert_called_once()
mock_adapter.get_llm_invocation_params.assert_called_once_with(
mock_context, system_instruction="You are a helpful assistant"
)
service._client.responses.create.assert_called_once_with(
model="gpt-4.1",
stream=False,
input=test_input,
instructions="You are a helpful assistant",
temperature=0.7,
max_output_tokens=100,
)
call_kwargs = service._client.responses.create.call_args.kwargs
assert call_kwargs["model"] == "gpt-4.1"
assert call_kwargs["stream"] is False
assert call_kwargs["input"] == [{"role": "user", "content": "Hello, world!"}]
assert call_kwargs["instructions"] == "You are a helpful assistant"
assert call_kwargs["temperature"] == 0.7
assert call_kwargs["max_output_tokens"] == 100
@pytest.mark.asyncio
@@ -831,16 +814,11 @@ async def test_openai_responses_run_inference_client_exception():
service = OpenAIResponsesLLMService()
service._client = AsyncMock()
mock_context = MagicMock(spec=LLMContext)
mock_adapter = MagicMock()
mock_adapter.get_llm_invocation_params.return_value = OpenAIResponsesLLMInvocationParams(
input=[], tools=OPENAI_NOT_GIVEN
)
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
context = LLMContext(messages=[{"role": "user", "content": "Hello"}])
service._client.responses.create = AsyncMock(side_effect=Exception("API Error"))
with pytest.raises(Exception, match="API Error"):
await service.run_inference(mock_context)
await service.run_inference(context)
@pytest.mark.asyncio
@@ -855,32 +833,47 @@ async def test_openai_responses_run_inference_system_instruction_overrides():
)
service._client = AsyncMock()
mock_context = MagicMock(spec=LLMContext)
mock_adapter = MagicMock()
test_input = [{"role": "user", "content": "Hello"}]
mock_adapter.get_llm_invocation_params.return_value = OpenAIResponsesLLMInvocationParams(
input=test_input,
tools=OPENAI_NOT_GIVEN,
instructions="New system instruction",
context = LLMContext(
messages=[{"role": "user", "content": "Hello"}],
)
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
mock_response = MagicMock()
mock_response.output_text = "Response"
service._client.responses.create = AsyncMock(return_value=mock_response)
result = await service.run_inference(
mock_context, system_instruction="New system instruction"
)
result = await service.run_inference(context, system_instruction="New system instruction")
assert result == "Response"
# The adapter should have been called with the override instruction
mock_adapter.get_llm_invocation_params.assert_called_once_with(
mock_context, system_instruction="New system instruction"
)
# The final API call should have the override instruction
call_kwargs = service._client.responses.create.call_args.kwargs
assert call_kwargs["instructions"] == "New system instruction"
assert call_kwargs["input"] == [{"role": "user", "content": "Hello"}]
@pytest.mark.asyncio
async def test_openai_responses_run_inference_empty_context_with_instruction():
"""Test that system_instruction becomes a developer message when context is empty."""
with patch.object(OpenAIResponsesLLMService, "_create_client"):
service = OpenAIResponsesLLMService(
settings=OpenAIResponsesLLMService.Settings(
model="gpt-4.1",
system_instruction="You are helpful",
),
)
service._client = AsyncMock()
context = LLMContext(messages=[])
mock_response = MagicMock()
mock_response.output_text = "Response"
service._client.responses.create = AsyncMock(return_value=mock_response)
result = await service.run_inference(context)
assert result == "Response"
call_kwargs = service._client.responses.create.call_args.kwargs
# With empty context, instruction should become a developer message
assert call_kwargs["input"] == [{"role": "developer", "content": "You are helpful"}]
assert "instructions" not in call_kwargs
@pytest.mark.asyncio
@@ -895,21 +888,16 @@ async def test_openai_responses_run_inference_max_tokens_override():
)
service._client = AsyncMock()
mock_context = MagicMock(spec=LLMContext)
mock_adapter = MagicMock()
test_input = [{"role": "user", "content": "Summarize this"}]
mock_adapter.get_llm_invocation_params.return_value = OpenAIResponsesLLMInvocationParams(
input=test_input, tools=OPENAI_NOT_GIVEN
context = LLMContext(
messages=[{"role": "user", "content": "Summarize this"}],
)
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
mock_response = MagicMock()
mock_response.output_text = "Summary"
service._client.responses.create = AsyncMock(return_value=mock_response)
result = await service.run_inference(mock_context, max_tokens=200)
result = await service.run_inference(context, max_tokens=200)
assert result == "Summary"
call_kwargs = service._client.responses.create.call_args.kwargs
# max_tokens override should take precedence
assert call_kwargs["max_output_tokens"] == 200