From e5aa3bbc20ad907bcd38852aa6aa06bb07733f5e Mon Sep 17 00:00:00 2001
From: Luke Halley <development@lukehalley.com>
Date: Wed, 1 Apr 2026 10:47:30 +0800
Subject: [PATCH] feat(aws): add prompt caching support for Bedrock
 ConverseStream

Adds `enable_prompt_caching` setting to `AWSBedrockLLMSettings`. When
enabled, appends `cachePoint` markers to system prompts and tool
definitions in ConverseStream requests.

This can reduce TTFT by up to 85% for multi-turn conversations where
the system prompt stays constant (e.g. voice agents, chat assistants).

Follows the same pattern as `AnthropicLLMService.enable_prompt_caching`.

Usage:
```python
llm = AWSBedrockLLMService(
    settings=AWSBedrockLLMSettings(
        model="au.anthropic.claude-haiku-4-5-20251001-v1:0",
        enable_prompt_caching=True,
    ),
)
```

See: https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
---
 src/pipecat/services/aws/llm.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py
index 722072592..83b970874 100644
--- a/src/pipecat/services/aws/llm.py
+++ b/src/pipecat/services/aws/llm.py
@@ -66,11 +66,16 @@ class AWSBedrockLLMSettings(LLMSettings):
     Parameters:
         stop_sequences: List of strings that stop generation.
         latency: Performance mode - "standard" or "optimized".
+        enable_prompt_caching: Whether to enable prompt caching by adding cachePoint
+            markers to system prompts and tool definitions. Can reduce TTFT by up to
+            85% for multi-turn conversations. See:
+            https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
         additional_model_request_fields: Additional model-specific parameters.
     """
 
     stop_sequences: List[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
     latency: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+    enable_prompt_caching: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
     additional_model_request_fields: Dict[str, Any] | _NotGiven = field(
         default_factory=lambda: NOT_GIVEN
     )
@@ -455,6 +460,24 @@ class AWSBedrockLLMService(LLMService):
             if self._settings.latency in ["standard", "optimized"]:
                 request_params["performanceConfig"] = {"latency": self._settings.latency}
 
+            # Add cache checkpoints to system prompts and tool definitions.
+            # This enables prompt caching for providers that support it (e.g.
+            # Anthropic Claude on Bedrock), reducing TTFT by up to 85% on
+            # multi-turn conversations where the system prompt stays constant.
+            if self._settings.enable_prompt_caching:
+                if "system" in request_params and request_params["system"]:
+                    system_list = request_params["system"]
+                    if not any("cachePoint" in item for item in system_list):
+                        system_list.append({"cachePoint": {"type": "default"}})
+                if (
+                    "toolConfig" in request_params
+                    and "tools" in request_params["toolConfig"]
+                    and request_params["toolConfig"]["tools"]
+                ):
+                    tools_list = request_params["toolConfig"]["tools"]
+                    if not any("cachePoint" in t for t in tools_list):
+                        tools_list.append({"cachePoint": {"type": "default"}})
+
             # Log request params with messages redacted for logging
             adapter = self.get_llm_adapter()
             messages_for_logging = adapter.get_messages_for_logging(context)