From c4f21ef76ba9d15b6fae045f6870cf1d789b6b2a Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 18 Mar 2026 14:17:21 -0400 Subject: [PATCH] test: add run_inference tests for OpenAIResponsesLLMService Uses real LLMContext and adapter (only HTTP client is mocked) to test basic inference, client exception propagation, system_instruction override, empty context fallback, and max_tokens override. --- tests/test_run_inference.py | 108 ++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 60 deletions(-) diff --git a/tests/test_run_inference.py b/tests/test_run_inference.py index 4b35aee2b..e01d7b36d 100644 --- a/tests/test_run_inference.py +++ b/tests/test_run_inference.py @@ -15,7 +15,6 @@ from pipecat.adapters.services.anthropic_adapter import AnthropicLLMInvocationPa from pipecat.adapters.services.bedrock_adapter import AWSBedrockLLMInvocationParams from pipecat.adapters.services.gemini_adapter import GeminiLLMInvocationParams from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams -from pipecat.adapters.services.open_ai_responses_adapter import OpenAIResponsesLLMInvocationParams from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.services.anthropic.llm import AnthropicLLMService from pipecat.services.aws.llm import AWSBedrockLLMService @@ -786,42 +785,26 @@ async def test_openai_responses_run_inference_with_llm_context(): ) service._client = AsyncMock() - # Setup mocks - mock_context = MagicMock(spec=LLMContext) - mock_adapter = MagicMock() - test_input = [ - {"role": "developer", "content": "You are a helpful assistant"}, - {"role": "user", "content": "Hello, world!"}, - ] - mock_adapter.get_llm_invocation_params.return_value = OpenAIResponsesLLMInvocationParams( - input=test_input, - tools=OPENAI_NOT_GIVEN, - instructions="You are a helpful assistant", + context = LLMContext( + messages=[ + {"role": "user", "content": "Hello, world!"}, + ] ) - service.get_llm_adapter = MagicMock(return_value=mock_adapter) - # Mock response mock_response = MagicMock() mock_response.output_text = "Hello! How can I help you today?" service._client.responses.create = AsyncMock(return_value=mock_response) - # Execute - result = await service.run_inference(mock_context) + result = await service.run_inference(context) - # Verify assert result == "Hello! How can I help you today?" - service.get_llm_adapter.assert_called_once() - mock_adapter.get_llm_invocation_params.assert_called_once_with( - mock_context, system_instruction="You are a helpful assistant" - ) - service._client.responses.create.assert_called_once_with( - model="gpt-4.1", - stream=False, - input=test_input, - instructions="You are a helpful assistant", - temperature=0.7, - max_output_tokens=100, - ) + call_kwargs = service._client.responses.create.call_args.kwargs + assert call_kwargs["model"] == "gpt-4.1" + assert call_kwargs["stream"] is False + assert call_kwargs["input"] == [{"role": "user", "content": "Hello, world!"}] + assert call_kwargs["instructions"] == "You are a helpful assistant" + assert call_kwargs["temperature"] == 0.7 + assert call_kwargs["max_output_tokens"] == 100 @pytest.mark.asyncio @@ -831,16 +814,11 @@ async def test_openai_responses_run_inference_client_exception(): service = OpenAIResponsesLLMService() service._client = AsyncMock() - mock_context = MagicMock(spec=LLMContext) - mock_adapter = MagicMock() - mock_adapter.get_llm_invocation_params.return_value = OpenAIResponsesLLMInvocationParams( - input=[], tools=OPENAI_NOT_GIVEN - ) - service.get_llm_adapter = MagicMock(return_value=mock_adapter) + context = LLMContext(messages=[{"role": "user", "content": "Hello"}]) service._client.responses.create = AsyncMock(side_effect=Exception("API Error")) with pytest.raises(Exception, match="API Error"): - await service.run_inference(mock_context) + await service.run_inference(context) @pytest.mark.asyncio @@ -855,32 +833,47 @@ async def test_openai_responses_run_inference_system_instruction_overrides(): ) service._client = AsyncMock() - mock_context = MagicMock(spec=LLMContext) - mock_adapter = MagicMock() - test_input = [{"role": "user", "content": "Hello"}] - mock_adapter.get_llm_invocation_params.return_value = OpenAIResponsesLLMInvocationParams( - input=test_input, - tools=OPENAI_NOT_GIVEN, - instructions="New system instruction", + context = LLMContext( + messages=[{"role": "user", "content": "Hello"}], ) - service.get_llm_adapter = MagicMock(return_value=mock_adapter) mock_response = MagicMock() mock_response.output_text = "Response" service._client.responses.create = AsyncMock(return_value=mock_response) - result = await service.run_inference( - mock_context, system_instruction="New system instruction" - ) + result = await service.run_inference(context, system_instruction="New system instruction") assert result == "Response" - # The adapter should have been called with the override instruction - mock_adapter.get_llm_invocation_params.assert_called_once_with( - mock_context, system_instruction="New system instruction" - ) - # The final API call should have the override instruction call_kwargs = service._client.responses.create.call_args.kwargs assert call_kwargs["instructions"] == "New system instruction" + assert call_kwargs["input"] == [{"role": "user", "content": "Hello"}] + + +@pytest.mark.asyncio +async def test_openai_responses_run_inference_empty_context_with_instruction(): + """Test that system_instruction becomes a developer message when context is empty.""" + with patch.object(OpenAIResponsesLLMService, "_create_client"): + service = OpenAIResponsesLLMService( + settings=OpenAIResponsesLLMService.Settings( + model="gpt-4.1", + system_instruction="You are helpful", + ), + ) + service._client = AsyncMock() + + context = LLMContext(messages=[]) + + mock_response = MagicMock() + mock_response.output_text = "Response" + service._client.responses.create = AsyncMock(return_value=mock_response) + + result = await service.run_inference(context) + + assert result == "Response" + call_kwargs = service._client.responses.create.call_args.kwargs + # With empty context, instruction should become a developer message + assert call_kwargs["input"] == [{"role": "developer", "content": "You are helpful"}] + assert "instructions" not in call_kwargs @pytest.mark.asyncio @@ -895,21 +888,16 @@ async def test_openai_responses_run_inference_max_tokens_override(): ) service._client = AsyncMock() - mock_context = MagicMock(spec=LLMContext) - mock_adapter = MagicMock() - test_input = [{"role": "user", "content": "Summarize this"}] - mock_adapter.get_llm_invocation_params.return_value = OpenAIResponsesLLMInvocationParams( - input=test_input, tools=OPENAI_NOT_GIVEN + context = LLMContext( + messages=[{"role": "user", "content": "Summarize this"}], ) - service.get_llm_adapter = MagicMock(return_value=mock_adapter) mock_response = MagicMock() mock_response.output_text = "Summary" service._client.responses.create = AsyncMock(return_value=mock_response) - result = await service.run_inference(mock_context, max_tokens=200) + result = await service.run_inference(context, max_tokens=200) assert result == "Summary" call_kwargs = service._client.responses.create.call_args.kwargs - # max_tokens override should take precedence assert call_kwargs["max_output_tokens"] == 200