907 lines
37 KiB
Python
907 lines
37 KiB
Python
#
|
|
# Copyright (c) 2024-2026, Daily
|
|
#
|
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
#
|
|
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
from anthropic import NOT_GIVEN
|
|
from openai import NotGiven
|
|
from openai._types import NOT_GIVEN as OPENAI_NOT_GIVEN
|
|
|
|
from pipecat.adapters.services.anthropic_adapter import AnthropicLLMInvocationParams
|
|
from pipecat.adapters.services.bedrock_adapter import AWSBedrockLLMInvocationParams
|
|
from pipecat.adapters.services.gemini_adapter import GeminiLLMInvocationParams
|
|
from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
|
|
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
from pipecat.services.anthropic.llm import AnthropicLLMService
|
|
from pipecat.services.aws.llm import AWSBedrockLLMService
|
|
from pipecat.services.google.llm import GoogleLLMService
|
|
from pipecat.services.openai.llm import OpenAILLMService
|
|
from pipecat.services.openai.responses.llm import (
|
|
OpenAIResponsesHttpLLMService,
|
|
OpenAIResponsesLLMService,
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_run_inference_with_llm_context():
|
|
"""Test run_inference with LLMContext returns expected response."""
|
|
# Create service with mocked client and specific parameters
|
|
with patch.object(OpenAILLMService, "create_client"):
|
|
from pipecat.services.openai.base_llm import BaseOpenAILLMService
|
|
|
|
params = BaseOpenAILLMService.InputParams(
|
|
temperature=0.7, max_tokens=100, frequency_penalty=0.5, seed=42
|
|
)
|
|
service = OpenAILLMService(model="gpt-4", params=params)
|
|
service._client = AsyncMock()
|
|
|
|
# Setup mocks
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [
|
|
{"role": "system", "content": "You are a helpful assistant"},
|
|
{"role": "user", "content": "Hello, world!"},
|
|
]
|
|
mock_adapter.get_llm_invocation_params.return_value = OpenAILLMInvocationParams(
|
|
messages=test_messages, tools=OPENAI_NOT_GIVEN, tool_choice=OPENAI_NOT_GIVEN
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
# Mock response
|
|
mock_response = MagicMock()
|
|
mock_response.choices = [MagicMock()]
|
|
mock_response.choices[0].message.content = "Hello! How can I help you today?"
|
|
service._client.chat.completions.create.return_value = mock_response
|
|
|
|
# Execute
|
|
result = await service.run_inference(mock_context)
|
|
|
|
# Verify
|
|
assert result == "Hello! How can I help you today?"
|
|
service.get_llm_adapter.assert_called_once()
|
|
# convert_developer_to_user=False because OpenAILLMService.supports_developer_role is True
|
|
mock_adapter.get_llm_invocation_params.assert_called_once_with(
|
|
mock_context, system_instruction=None, convert_developer_to_user=False
|
|
)
|
|
service._client.chat.completions.create.assert_called_once_with(
|
|
model="gpt-4",
|
|
stream=False,
|
|
frequency_penalty=0.5,
|
|
presence_penalty=OPENAI_NOT_GIVEN,
|
|
seed=42,
|
|
temperature=0.7,
|
|
top_p=OPENAI_NOT_GIVEN,
|
|
max_tokens=100,
|
|
max_completion_tokens=OPENAI_NOT_GIVEN,
|
|
service_tier=OPENAI_NOT_GIVEN,
|
|
messages=test_messages,
|
|
tools=OPENAI_NOT_GIVEN,
|
|
tool_choice=OPENAI_NOT_GIVEN,
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_run_inference_client_exception():
|
|
"""Test that exceptions from the client are propagated."""
|
|
with patch.object(OpenAILLMService, "create_client"):
|
|
service = OpenAILLMService(model="gpt-4")
|
|
service._client = AsyncMock()
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
mock_adapter.get_llm_invocation_params.return_value = OpenAILLMInvocationParams(
|
|
messages=[], tools=OPENAI_NOT_GIVEN, tool_choice=OPENAI_NOT_GIVEN
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
service._client.chat.completions.create.side_effect = Exception("API Error")
|
|
|
|
with pytest.raises(Exception, match="API Error"):
|
|
await service.run_inference(mock_context)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_anthropic_run_inference_with_llm_context():
|
|
"""Test run_inference with LLMContext returns expected response for Anthropic."""
|
|
# Create service with mocked client and specific parameters
|
|
from pipecat.services.anthropic.llm import AnthropicLLMService
|
|
|
|
params = AnthropicLLMService.InputParams(max_tokens=2048, temperature=0.6, top_k=50, top_p=0.95)
|
|
service = AnthropicLLMService(
|
|
api_key="test-key", model="claude-3-sonnet-20240229", params=params
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
# Setup mocks
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [{"role": "user", "content": "Hello, world!"}]
|
|
test_system = "You are a helpful assistant"
|
|
mock_adapter.get_llm_invocation_params.return_value = AnthropicLLMInvocationParams(
|
|
messages=test_messages, system=test_system, tools=[]
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
# Mock response
|
|
mock_response = MagicMock()
|
|
mock_response.content = [MagicMock()]
|
|
mock_response.content[0].text = "Hello! How can I help you today?"
|
|
service._client.beta.messages.create.return_value = mock_response
|
|
|
|
# Execute
|
|
result = await service.run_inference(mock_context)
|
|
|
|
# Verify
|
|
assert result == "Hello! How can I help you today?"
|
|
service.get_llm_adapter.assert_called_once()
|
|
mock_adapter.get_llm_invocation_params.assert_called_once_with(
|
|
mock_context, enable_prompt_caching=False, system_instruction=None
|
|
)
|
|
service._client.beta.messages.create.assert_called_once_with(
|
|
model="claude-3-sonnet-20240229",
|
|
max_tokens=2048,
|
|
stream=False,
|
|
temperature=0.6,
|
|
top_k=50,
|
|
top_p=0.95,
|
|
messages=test_messages,
|
|
system=test_system,
|
|
tools=[],
|
|
betas=["interleaved-thinking-2025-05-14"],
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_anthropic_run_inference_client_exception():
|
|
"""Test that exceptions from the Anthropic client are propagated."""
|
|
service = AnthropicLLMService(api_key="test-key", model="claude-3-sonnet-20240229")
|
|
service._client = AsyncMock()
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
mock_adapter.get_llm_invocation_params.return_value = AnthropicLLMInvocationParams(
|
|
messages=[], system="Test system", tools=[]
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
service._client.beta.messages.create.side_effect = Exception("Anthropic API Error")
|
|
|
|
with pytest.raises(Exception, match="Anthropic API Error"):
|
|
await service.run_inference(mock_context)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_google_run_inference_with_llm_context():
|
|
"""Test run_inference with LLMContext returns expected response for Google."""
|
|
# Create service with mocked client
|
|
service = GoogleLLMService(api_key="test-key", model="gemini-2.0-flash")
|
|
service._client = AsyncMock()
|
|
|
|
# Setup mocks
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [{"role": "user", "content": "Hello, world!"}]
|
|
test_system = "You are a helpful assistant"
|
|
mock_adapter.get_llm_invocation_params.return_value = GeminiLLMInvocationParams(
|
|
messages=test_messages, system_instruction=test_system, tools=NotGiven()
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
# Mock response
|
|
mock_response = MagicMock()
|
|
mock_response.candidates = [MagicMock()]
|
|
mock_response.candidates[0].content = MagicMock()
|
|
mock_response.candidates[0].content.parts = [MagicMock()]
|
|
mock_response.candidates[0].content.parts[0].text = "Hello! How can I help you today?"
|
|
service._client.aio = AsyncMock()
|
|
service._client.aio.models = AsyncMock()
|
|
service._client.aio.models.generate_content = AsyncMock(return_value=mock_response)
|
|
|
|
# Execute
|
|
result = await service.run_inference(mock_context)
|
|
|
|
# Verify
|
|
assert result == "Hello! How can I help you today?"
|
|
service.get_llm_adapter.assert_called_once()
|
|
mock_adapter.get_llm_invocation_params.assert_called_once_with(
|
|
mock_context, system_instruction=None
|
|
)
|
|
service._client.aio.models.generate_content.assert_called_once()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_google_run_inference_client_exception():
|
|
"""Test that exceptions from the Google client are propagated."""
|
|
service = GoogleLLMService(api_key="test-key", model="gemini-2.0-flash")
|
|
service._client = AsyncMock()
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
mock_adapter.get_llm_invocation_params.return_value = GeminiLLMInvocationParams(
|
|
messages=[], system_instruction="Test system", tools=NotGiven()
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
service._client.aio = AsyncMock()
|
|
service._client.aio.models = AsyncMock()
|
|
service._client.aio.models.generate_content = AsyncMock(
|
|
side_effect=Exception("Google API Error")
|
|
)
|
|
|
|
with pytest.raises(Exception, match="Google API Error"):
|
|
await service.run_inference(mock_context)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_aws_bedrock_run_inference_with_llm_context():
|
|
"""Test run_inference with LLMContext returns expected response for AWS Bedrock."""
|
|
# Create service with specific parameters
|
|
from pipecat.services.aws.llm import AWSBedrockLLMService
|
|
|
|
params = AWSBedrockLLMService.InputParams(max_tokens=1024, temperature=0.5, top_p=0.85)
|
|
service = AWSBedrockLLMService(model="anthropic.claude-3-sonnet-20240229-v1:0", params=params)
|
|
|
|
# Setup mocks
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [{"role": "user", "content": [{"text": "Hello, world!"}]}]
|
|
test_system = [{"text": "You are a helpful assistant"}]
|
|
mock_adapter.get_llm_invocation_params.return_value = AWSBedrockLLMInvocationParams(
|
|
messages=test_messages, system=test_system, tools=[], tool_choice=None
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
# Mock the client and response
|
|
mock_client = AsyncMock()
|
|
mock_response = {
|
|
"output": {"message": {"content": [{"text": "Hello! How can I help you today?"}]}}
|
|
}
|
|
mock_client.converse.return_value = mock_response
|
|
|
|
# Patch the _aws_session.client method to be an async context manager
|
|
mock_context_manager = AsyncMock()
|
|
mock_context_manager.__aenter__ = AsyncMock(return_value=mock_client)
|
|
mock_context_manager.__aexit__ = AsyncMock(return_value=None)
|
|
|
|
with patch.object(service._aws_session, "client", return_value=mock_context_manager):
|
|
# Execute
|
|
result = await service.run_inference(mock_context)
|
|
|
|
# Verify
|
|
assert result == "Hello! How can I help you today?"
|
|
service.get_llm_adapter.assert_called_once()
|
|
mock_adapter.get_llm_invocation_params.assert_called_once_with(
|
|
mock_context, system_instruction=None
|
|
)
|
|
|
|
# Verify the call includes configured parameters
|
|
call_kwargs = mock_client.converse.call_args.kwargs
|
|
assert call_kwargs["modelId"] == "anthropic.claude-3-sonnet-20240229-v1:0"
|
|
assert call_kwargs["messages"] == test_messages
|
|
assert call_kwargs["system"] == test_system
|
|
assert call_kwargs["additionalModelRequestFields"] == {}
|
|
assert "inferenceConfig" in call_kwargs
|
|
assert call_kwargs["inferenceConfig"]["maxTokens"] == 1024
|
|
assert call_kwargs["inferenceConfig"]["temperature"] == 0.5
|
|
assert call_kwargs["inferenceConfig"]["topP"] == 0.85
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_aws_bedrock_run_inference_client_exception():
|
|
"""Test that exceptions from the AWS Bedrock client are propagated."""
|
|
service = AWSBedrockLLMService(model="anthropic.claude-3-sonnet-20240229-v1:0")
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
mock_adapter.get_llm_invocation_params.return_value = AWSBedrockLLMInvocationParams(
|
|
messages=[], system=[{"text": "Test system"}], tools=[], tool_choice=None
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
# Mock AWS client to raise exception
|
|
mock_client = AsyncMock()
|
|
mock_client.converse.side_effect = Exception("Bedrock API Error")
|
|
|
|
# Patch the _aws_session.client method to be an async context manager
|
|
mock_context_manager = AsyncMock()
|
|
mock_context_manager.__aenter__ = AsyncMock(return_value=mock_client)
|
|
mock_context_manager.__aexit__ = AsyncMock(return_value=None)
|
|
|
|
with patch.object(service._aws_session, "client", return_value=mock_context_manager):
|
|
with pytest.raises(Exception, match="Bedrock API Error"):
|
|
await service.run_inference(mock_context)
|
|
|
|
|
|
# --- system_instruction parameter tests ---
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_run_inference_system_instruction_overrides_context():
|
|
"""Test that system_instruction overrides the system message from context."""
|
|
with patch.object(OpenAILLMService, "create_client"):
|
|
service = OpenAILLMService(model="gpt-4")
|
|
service._client = AsyncMock()
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [
|
|
{"role": "system", "content": "Original system message"},
|
|
{"role": "user", "content": "Hello"},
|
|
]
|
|
mock_adapter.get_llm_invocation_params.return_value = OpenAILLMInvocationParams(
|
|
messages=test_messages, tools=OPENAI_NOT_GIVEN, tool_choice=OPENAI_NOT_GIVEN
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.choices = [MagicMock()]
|
|
mock_response.choices[0].message.content = "Response"
|
|
service._client.chat.completions.create.return_value = mock_response
|
|
|
|
result = await service.run_inference(
|
|
mock_context, system_instruction="New system instruction"
|
|
)
|
|
|
|
assert result == "Response"
|
|
# Verify the adapter was called with the correct system_instruction.
|
|
# convert_developer_to_user=False because OpenAILLMService.supports_developer_role is True.
|
|
mock_adapter.get_llm_invocation_params.assert_called_once_with(
|
|
mock_context,
|
|
system_instruction="New system instruction",
|
|
convert_developer_to_user=False,
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_run_inference_system_instruction_none_unchanged():
|
|
"""Test that when system_instruction is None, behavior is unchanged."""
|
|
with patch.object(OpenAILLMService, "create_client"):
|
|
service = OpenAILLMService(model="gpt-4")
|
|
service._client = AsyncMock()
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [
|
|
{"role": "system", "content": "Original system message"},
|
|
{"role": "user", "content": "Hello"},
|
|
]
|
|
mock_adapter.get_llm_invocation_params.return_value = OpenAILLMInvocationParams(
|
|
messages=test_messages, tools=OPENAI_NOT_GIVEN, tool_choice=OPENAI_NOT_GIVEN
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.choices = [MagicMock()]
|
|
mock_response.choices[0].message.content = "Response"
|
|
service._client.chat.completions.create.return_value = mock_response
|
|
|
|
result = await service.run_inference(mock_context)
|
|
|
|
assert result == "Response"
|
|
call_kwargs = service._client.chat.completions.create.call_args.kwargs
|
|
messages = call_kwargs["messages"]
|
|
assert messages[0] == {"role": "system", "content": "Original system message"}
|
|
assert messages[1] == {"role": "user", "content": "Hello"}
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_anthropic_run_inference_system_instruction_overrides_context():
|
|
"""Test that system_instruction overrides the system message for Anthropic."""
|
|
service = AnthropicLLMService(api_key="test-key", model="claude-3-sonnet-20240229")
|
|
service._client = AsyncMock()
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [{"role": "user", "content": "Hello"}]
|
|
mock_adapter.get_llm_invocation_params.return_value = AnthropicLLMInvocationParams(
|
|
messages=test_messages, system="Original system", tools=[]
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.content = [MagicMock()]
|
|
mock_response.content[0].text = "Response"
|
|
service._client.beta.messages.create.return_value = mock_response
|
|
|
|
result = await service.run_inference(mock_context, system_instruction="New system instruction")
|
|
|
|
assert result == "Response"
|
|
# Verify the adapter was called with the correct system_instruction
|
|
mock_adapter.get_llm_invocation_params.assert_called_once_with(
|
|
mock_context,
|
|
enable_prompt_caching=False,
|
|
system_instruction="New system instruction",
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_anthropic_run_inference_system_instruction_none_unchanged():
|
|
"""Test that when system_instruction is None, Anthropic behavior is unchanged."""
|
|
service = AnthropicLLMService(api_key="test-key", model="claude-3-sonnet-20240229")
|
|
service._client = AsyncMock()
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [{"role": "user", "content": "Hello"}]
|
|
mock_adapter.get_llm_invocation_params.return_value = AnthropicLLMInvocationParams(
|
|
messages=test_messages, system="Original system", tools=[]
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.content = [MagicMock()]
|
|
mock_response.content[0].text = "Response"
|
|
service._client.beta.messages.create.return_value = mock_response
|
|
|
|
result = await service.run_inference(mock_context)
|
|
|
|
assert result == "Response"
|
|
call_kwargs = service._client.beta.messages.create.call_args.kwargs
|
|
assert call_kwargs["system"] == "Original system"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_google_run_inference_system_instruction_overrides_context():
|
|
"""Test that system_instruction overrides the system message for Google."""
|
|
service = GoogleLLMService(api_key="test-key", model="gemini-2.0-flash")
|
|
service._client = AsyncMock()
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [{"role": "user", "content": "Hello"}]
|
|
mock_adapter.get_llm_invocation_params.return_value = GeminiLLMInvocationParams(
|
|
messages=test_messages, system_instruction="Original system", tools=NotGiven()
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.candidates = [MagicMock()]
|
|
mock_response.candidates[0].content = MagicMock()
|
|
mock_response.candidates[0].content.parts = [MagicMock()]
|
|
mock_response.candidates[0].content.parts[0].text = "Response"
|
|
service._client.aio = AsyncMock()
|
|
service._client.aio.models = AsyncMock()
|
|
service._client.aio.models.generate_content = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(mock_context, system_instruction="New system instruction")
|
|
|
|
assert result == "Response"
|
|
# Verify the adapter was called with the correct system_instruction
|
|
mock_adapter.get_llm_invocation_params.assert_called_once_with(
|
|
mock_context, system_instruction="New system instruction"
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_google_run_inference_system_instruction_none_unchanged():
|
|
"""Test that when system_instruction is None, Google behavior is unchanged."""
|
|
service = GoogleLLMService(api_key="test-key", model="gemini-2.0-flash")
|
|
service._client = AsyncMock()
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [{"role": "user", "content": "Hello"}]
|
|
mock_adapter.get_llm_invocation_params.return_value = GeminiLLMInvocationParams(
|
|
messages=test_messages, system_instruction="Original system", tools=NotGiven()
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.candidates = [MagicMock()]
|
|
mock_response.candidates[0].content = MagicMock()
|
|
mock_response.candidates[0].content.parts = [MagicMock()]
|
|
mock_response.candidates[0].content.parts[0].text = "Response"
|
|
service._client.aio = AsyncMock()
|
|
service._client.aio.models = AsyncMock()
|
|
service._client.aio.models.generate_content = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(mock_context)
|
|
|
|
assert result == "Response"
|
|
call_kwargs = service._client.aio.models.generate_content.call_args.kwargs
|
|
config = call_kwargs["config"]
|
|
assert config.system_instruction == "Original system"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_aws_bedrock_run_inference_system_instruction_overrides_context():
|
|
"""Test that system_instruction overrides the system message for AWS Bedrock."""
|
|
service = AWSBedrockLLMService(model="anthropic.claude-3-sonnet-20240229-v1:0")
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [{"role": "user", "content": [{"text": "Hello"}]}]
|
|
mock_adapter.get_llm_invocation_params.return_value = AWSBedrockLLMInvocationParams(
|
|
messages=test_messages,
|
|
system=[{"text": "Original system"}],
|
|
tools=[],
|
|
tool_choice=None,
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
mock_client = AsyncMock()
|
|
mock_response = {"output": {"message": {"content": [{"text": "Response"}]}}}
|
|
mock_client.converse.return_value = mock_response
|
|
|
|
mock_context_manager = AsyncMock()
|
|
mock_context_manager.__aenter__ = AsyncMock(return_value=mock_client)
|
|
mock_context_manager.__aexit__ = AsyncMock(return_value=None)
|
|
|
|
with patch.object(service._aws_session, "client", return_value=mock_context_manager):
|
|
result = await service.run_inference(
|
|
mock_context, system_instruction="New system instruction"
|
|
)
|
|
|
|
assert result == "Response"
|
|
# Verify the adapter was called with the correct system_instruction
|
|
mock_adapter.get_llm_invocation_params.assert_called_once_with(
|
|
mock_context, system_instruction="New system instruction"
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_aws_bedrock_run_inference_system_instruction_none_unchanged():
|
|
"""Test that when system_instruction is None, AWS Bedrock behavior is unchanged."""
|
|
service = AWSBedrockLLMService(model="anthropic.claude-3-sonnet-20240229-v1:0")
|
|
|
|
mock_context = MagicMock(spec=LLMContext)
|
|
mock_adapter = MagicMock()
|
|
test_messages = [{"role": "user", "content": [{"text": "Hello"}]}]
|
|
mock_adapter.get_llm_invocation_params.return_value = AWSBedrockLLMInvocationParams(
|
|
messages=test_messages,
|
|
system=[{"text": "Original system"}],
|
|
tools=[],
|
|
tool_choice=None,
|
|
)
|
|
service.get_llm_adapter = MagicMock(return_value=mock_adapter)
|
|
|
|
mock_client = AsyncMock()
|
|
mock_response = {"output": {"message": {"content": [{"text": "Response"}]}}}
|
|
mock_client.converse.return_value = mock_response
|
|
|
|
mock_context_manager = AsyncMock()
|
|
mock_context_manager.__aenter__ = AsyncMock(return_value=mock_client)
|
|
mock_context_manager.__aexit__ = AsyncMock(return_value=None)
|
|
|
|
with patch.object(service._aws_session, "client", return_value=mock_context_manager):
|
|
result = await service.run_inference(mock_context)
|
|
|
|
assert result == "Response"
|
|
call_kwargs = mock_client.converse.call_args.kwargs
|
|
assert call_kwargs["system"] == [{"text": "Original system"}]
|
|
|
|
|
|
# --- OpenAI Responses API tests ---
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_run_inference_with_llm_context():
|
|
"""Test run_inference with LLMContext returns expected response."""
|
|
with patch.object(OpenAIResponsesLLMService, "_create_client"):
|
|
service = OpenAIResponsesLLMService(
|
|
settings=OpenAIResponsesLLMService.Settings(
|
|
model="gpt-4.1",
|
|
system_instruction="You are a helpful assistant",
|
|
temperature=0.7,
|
|
max_completion_tokens=100,
|
|
),
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(
|
|
messages=[
|
|
{"role": "user", "content": "Hello, world!"},
|
|
]
|
|
)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.output_text = "Hello! How can I help you today?"
|
|
service._client.responses.create = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(context)
|
|
|
|
assert result == "Hello! How can I help you today?"
|
|
call_kwargs = service._client.responses.create.call_args.kwargs
|
|
assert call_kwargs["model"] == "gpt-4.1"
|
|
assert call_kwargs["stream"] is False
|
|
assert call_kwargs["store"] is False
|
|
assert call_kwargs["input"] == [{"role": "user", "content": "Hello, world!"}]
|
|
assert call_kwargs["instructions"] == "You are a helpful assistant"
|
|
assert call_kwargs["temperature"] == 0.7
|
|
assert call_kwargs["max_output_tokens"] == 100
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_run_inference_client_exception():
|
|
"""Test that exceptions from the client are propagated."""
|
|
with patch.object(OpenAIResponsesLLMService, "_create_client"):
|
|
service = OpenAIResponsesLLMService()
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(messages=[{"role": "user", "content": "Hello"}])
|
|
service._client.responses.create = AsyncMock(side_effect=Exception("API Error"))
|
|
|
|
with pytest.raises(Exception, match="API Error"):
|
|
await service.run_inference(context)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_run_inference_system_instruction_overrides():
|
|
"""Test that system_instruction parameter overrides the settings instruction."""
|
|
with patch.object(OpenAIResponsesLLMService, "_create_client"):
|
|
service = OpenAIResponsesLLMService(
|
|
settings=OpenAIResponsesLLMService.Settings(
|
|
model="gpt-4.1",
|
|
system_instruction="Original instruction",
|
|
),
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.output_text = "Response"
|
|
service._client.responses.create = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(context, system_instruction="New system instruction")
|
|
|
|
assert result == "Response"
|
|
call_kwargs = service._client.responses.create.call_args.kwargs
|
|
assert call_kwargs["instructions"] == "New system instruction"
|
|
assert call_kwargs["input"] == [{"role": "user", "content": "Hello"}]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_run_inference_empty_context_with_instruction():
|
|
"""Test that system_instruction becomes a developer message when context is empty."""
|
|
with patch.object(OpenAIResponsesLLMService, "_create_client"):
|
|
service = OpenAIResponsesLLMService(
|
|
settings=OpenAIResponsesLLMService.Settings(
|
|
model="gpt-4.1",
|
|
system_instruction="You are helpful",
|
|
),
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(messages=[])
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.output_text = "Response"
|
|
service._client.responses.create = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(context)
|
|
|
|
assert result == "Response"
|
|
call_kwargs = service._client.responses.create.call_args.kwargs
|
|
# With empty context, instruction should become a developer message
|
|
assert call_kwargs["input"] == [{"role": "developer", "content": "You are helpful"}]
|
|
assert "instructions" not in call_kwargs
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_run_inference_max_tokens_override():
|
|
"""Test that max_tokens parameter overrides max_output_tokens."""
|
|
with patch.object(OpenAIResponsesLLMService, "_create_client"):
|
|
service = OpenAIResponsesLLMService(
|
|
settings=OpenAIResponsesLLMService.Settings(
|
|
model="gpt-4.1",
|
|
max_completion_tokens=500,
|
|
),
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(
|
|
messages=[{"role": "user", "content": "Summarize this"}],
|
|
)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.output_text = "Summary"
|
|
service._client.responses.create = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(context, max_tokens=200)
|
|
|
|
assert result == "Summary"
|
|
call_kwargs = service._client.responses.create.call_args.kwargs
|
|
assert call_kwargs["max_output_tokens"] == 200
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_run_inference_system_instruction_param_with_empty_context():
|
|
"""Test that system_instruction param becomes a developer message when context is empty.
|
|
|
|
The Responses API rejects requests with instructions but no input items.
|
|
When run_inference is called with an explicit system_instruction and an
|
|
empty context, the instruction must become a developer message — not be
|
|
sent as the instructions parameter.
|
|
"""
|
|
with patch.object(OpenAIResponsesLLMService, "_create_client"):
|
|
service = OpenAIResponsesLLMService(
|
|
settings=OpenAIResponsesLLMService.Settings(model="gpt-4.1"),
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(messages=[])
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.output_text = "Response"
|
|
service._client.responses.create = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(
|
|
context, system_instruction="Summarize the conversation"
|
|
)
|
|
|
|
assert result == "Response"
|
|
call_kwargs = service._client.responses.create.call_args.kwargs
|
|
assert call_kwargs["input"] == [
|
|
{"role": "developer", "content": "Summarize the conversation"}
|
|
]
|
|
assert "instructions" not in call_kwargs
|
|
|
|
|
|
# --- OpenAI Responses HTTP API tests ---
|
|
# These mirror the WebSocket variant tests above, verifying that the HTTP
|
|
# variant's run_inference (inherited from the shared base class) works
|
|
# identically.
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_http_run_inference_with_llm_context():
|
|
"""Test run_inference with LLMContext returns expected response (HTTP variant)."""
|
|
with patch.object(OpenAIResponsesHttpLLMService, "_create_client"):
|
|
service = OpenAIResponsesHttpLLMService(
|
|
settings=OpenAIResponsesHttpLLMService.Settings(
|
|
model="gpt-4.1",
|
|
system_instruction="You are a helpful assistant",
|
|
temperature=0.7,
|
|
max_completion_tokens=100,
|
|
),
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(
|
|
messages=[
|
|
{"role": "user", "content": "Hello, world!"},
|
|
]
|
|
)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.output_text = "Hello! How can I help you today?"
|
|
service._client.responses.create = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(context)
|
|
|
|
assert result == "Hello! How can I help you today?"
|
|
call_kwargs = service._client.responses.create.call_args.kwargs
|
|
assert call_kwargs["model"] == "gpt-4.1"
|
|
assert call_kwargs["stream"] is False
|
|
assert call_kwargs["store"] is False
|
|
assert call_kwargs["input"] == [{"role": "user", "content": "Hello, world!"}]
|
|
assert call_kwargs["instructions"] == "You are a helpful assistant"
|
|
assert call_kwargs["temperature"] == 0.7
|
|
assert call_kwargs["max_output_tokens"] == 100
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_http_run_inference_client_exception():
|
|
"""Test that exceptions from the client are propagated (HTTP variant)."""
|
|
with patch.object(OpenAIResponsesHttpLLMService, "_create_client"):
|
|
service = OpenAIResponsesHttpLLMService()
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(messages=[{"role": "user", "content": "Hello"}])
|
|
service._client.responses.create = AsyncMock(side_effect=Exception("API Error"))
|
|
|
|
with pytest.raises(Exception, match="API Error"):
|
|
await service.run_inference(context)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_http_run_inference_system_instruction_overrides():
|
|
"""Test that system_instruction parameter overrides the settings instruction (HTTP variant)."""
|
|
with patch.object(OpenAIResponsesHttpLLMService, "_create_client"):
|
|
service = OpenAIResponsesHttpLLMService(
|
|
settings=OpenAIResponsesHttpLLMService.Settings(
|
|
model="gpt-4.1",
|
|
system_instruction="Original instruction",
|
|
),
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.output_text = "Response"
|
|
service._client.responses.create = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(context, system_instruction="New system instruction")
|
|
|
|
assert result == "Response"
|
|
call_kwargs = service._client.responses.create.call_args.kwargs
|
|
assert call_kwargs["instructions"] == "New system instruction"
|
|
assert call_kwargs["input"] == [{"role": "user", "content": "Hello"}]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_http_run_inference_empty_context_with_instruction():
|
|
"""Test that system_instruction becomes a developer message when context is empty (HTTP)."""
|
|
with patch.object(OpenAIResponsesHttpLLMService, "_create_client"):
|
|
service = OpenAIResponsesHttpLLMService(
|
|
settings=OpenAIResponsesHttpLLMService.Settings(
|
|
model="gpt-4.1",
|
|
system_instruction="You are helpful",
|
|
),
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(messages=[])
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.output_text = "Response"
|
|
service._client.responses.create = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(context)
|
|
|
|
assert result == "Response"
|
|
call_kwargs = service._client.responses.create.call_args.kwargs
|
|
assert call_kwargs["input"] == [{"role": "developer", "content": "You are helpful"}]
|
|
assert "instructions" not in call_kwargs
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_http_run_inference_max_tokens_override():
|
|
"""Test that max_tokens parameter overrides max_output_tokens (HTTP variant)."""
|
|
with patch.object(OpenAIResponsesHttpLLMService, "_create_client"):
|
|
service = OpenAIResponsesHttpLLMService(
|
|
settings=OpenAIResponsesHttpLLMService.Settings(
|
|
model="gpt-4.1",
|
|
max_completion_tokens=500,
|
|
),
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(
|
|
messages=[{"role": "user", "content": "Summarize this"}],
|
|
)
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.output_text = "Summary"
|
|
service._client.responses.create = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(context, max_tokens=200)
|
|
|
|
assert result == "Summary"
|
|
call_kwargs = service._client.responses.create.call_args.kwargs
|
|
assert call_kwargs["max_output_tokens"] == 200
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_responses_http_run_inference_system_instruction_param_with_empty_context():
|
|
"""Test system_instruction param becomes developer message for empty context (HTTP)."""
|
|
with patch.object(OpenAIResponsesHttpLLMService, "_create_client"):
|
|
service = OpenAIResponsesHttpLLMService(
|
|
settings=OpenAIResponsesHttpLLMService.Settings(model="gpt-4.1"),
|
|
)
|
|
service._client = AsyncMock()
|
|
|
|
context = LLMContext(messages=[])
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.output_text = "Response"
|
|
service._client.responses.create = AsyncMock(return_value=mock_response)
|
|
|
|
result = await service.run_inference(
|
|
context, system_instruction="Summarize the conversation"
|
|
)
|
|
|
|
assert result == "Response"
|
|
call_kwargs = service._client.responses.create.call_args.kwargs
|
|
assert call_kwargs["input"] == [
|
|
{"role": "developer", "content": "Summarize the conversation"}
|
|
]
|
|
assert "instructions" not in call_kwargs
|