From b22ac8292f9ed8502b73e68bc38d12b3d60b2969 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 9 Dec 2025 09:38:47 -0500 Subject: [PATCH 01/10] Update default model in `AWSNovaSonicLLMService` to "amazon.nova-2-sonic-v1:0" --- src/pipecat/services/aws/nova_sonic/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index d08d2603f..f581c351d 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -192,7 +192,7 @@ class AWSNovaSonicLLMService(LLMService): access_key_id: str, session_token: Optional[str] = None, region: str, - model: str = "amazon.nova-sonic-v1:0", + model: str = "amazon.nova-2-sonic-v1:0", voice_id: str = "matthew", # matthew, tiffany, amy params: Optional[Params] = None, system_instruction: Optional[str] = None, From 53de6c0b9a5493c0e5606233d9ddf3da17801aab Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 9 Dec 2025 09:46:53 -0500 Subject: [PATCH 02/10] Update list of supported regions in 40-aws-nova-sonic.py --- examples/foundational/40-aws-nova-sonic.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/examples/foundational/40-aws-nova-sonic.py b/examples/foundational/40-aws-nova-sonic.py index e5e36e404..f53dd06f2 100644 --- a/examples/foundational/40-aws-nova-sonic.py +++ b/examples/foundational/40-aws-nova-sonic.py @@ -105,7 +105,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): llm = AWSNovaSonicLLMService( secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), - region=os.getenv("AWS_REGION"), # as of 2025-05-06, us-east-1 is the only supported region + # as of 2025-12-09, these are the supported regions: + # - Nova 2 Sonic (the default model): + # - us-east-1 + # - us-west-2 + # - ap-northeast-1 + # - Nova Sonic (the older model): + # - us-east-1 + # - ap-northeast-1 + region=os.getenv("AWS_REGION"), session_token=os.getenv("AWS_SESSION_TOKEN"), voice_id="tiffany", # matthew, tiffany, amy # you could choose to pass instruction here rather than via context From ca5e668f4a730874bafcdba1d0a209904f28de8d Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 9 Dec 2025 10:14:27 -0500 Subject: [PATCH 03/10] Update `AWSNovaSonicLLMService` docstring with more (and more up-to-date) info --- examples/foundational/40-aws-nova-sonic.py | 2 +- src/pipecat/services/aws/nova_sonic/llm.py | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/examples/foundational/40-aws-nova-sonic.py b/examples/foundational/40-aws-nova-sonic.py index f53dd06f2..ed329e281 100644 --- a/examples/foundational/40-aws-nova-sonic.py +++ b/examples/foundational/40-aws-nova-sonic.py @@ -115,7 +115,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): # - ap-northeast-1 region=os.getenv("AWS_REGION"), session_token=os.getenv("AWS_SESSION_TOKEN"), - voice_id="tiffany", # matthew, tiffany, amy + voice_id="tiffany", # you could choose to pass instruction here rather than via context # system_instruction=system_instruction # you could choose to pass tools here rather than via context diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index f581c351d..95477321b 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -193,7 +193,7 @@ class AWSNovaSonicLLMService(LLMService): session_token: Optional[str] = None, region: str, model: str = "amazon.nova-2-sonic-v1:0", - voice_id: str = "matthew", # matthew, tiffany, amy + voice_id: str = "matthew", params: Optional[Params] = None, system_instruction: Optional[str] = None, tools: Optional[ToolsSchema] = None, @@ -207,8 +207,15 @@ class AWSNovaSonicLLMService(LLMService): access_key_id: AWS access key ID for authentication. session_token: AWS session token for authentication. region: AWS region where the service is hosted. - model: Model identifier. Defaults to "amazon.nova-sonic-v1:0". - voice_id: Voice ID for speech synthesis. Options: matthew, tiffany, amy. + Supported regions: + - Nova 2 Sonic (the default model): "us-east-1", "us-west-2", "ap-northeast-1" + - Nova Sonic (the older model): "us-east-1", "ap-northeast-1" + model: Model identifier. Defaults to "amazon.nova-2-sonic-v1:0". + voice_id: Voice ID for speech synthesis. + Note that some voices are designed for use with a specific language. + Options: + - Nova 2 Sonic (the default model): see https://docs.aws.amazon.com/nova/latest/nova2-userguide/sonic-language-support.html + - Nova Sonic (the older model): see https://docs.aws.amazon.com/nova/latest/userguide/available-voices.html. params: Model parameters for audio configuration and inference. system_instruction: System-level instruction for the model. tools: Available tools/functions for the model to use. From 926514ca1892d3ed57d6d24ae810e71a81ecda43 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 9 Dec 2025 11:26:43 -0500 Subject: [PATCH 04/10] Add support to `AWSNovaSonicLLMService` for new "endpointingSensitivity" parameter. --- src/pipecat/services/aws/nova_sonic/llm.py | 36 +++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index 95477321b..641f126e5 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -157,6 +157,12 @@ class Params(BaseModel): max_tokens: Maximum number of tokens to generate. top_p: Nucleus sampling parameter. temperature: Sampling temperature for text generation. + endpointing_sensitivity: Controls how quickly Nova Sonic decides the + user has stopped speaking. Can be "LOW", "MEDIUM", or "HIGH", with + "HIGH" being the most sensitive (i.e., causing the model to respond + most quickly). + If not set, uses the model's default behavior. + Only supported with Nova 2 Sonic (the default model). """ # Audio input @@ -174,6 +180,9 @@ class Params(BaseModel): top_p: Optional[float] = Field(default=0.9) temperature: Optional[float] = Field(default=0.7) + # Turn-taking + endpointing_sensitivity: Optional[str] = Field(default=None) + class AWSNovaSonicLLMService(LLMService): """AWS Nova Sonic speech-to-speech LLM service. @@ -239,6 +248,17 @@ class AWSNovaSonicLLMService(LLMService): self._system_instruction = system_instruction self._tools = tools + # Validate endpointing_sensitivity parameter + if ( + self._params.endpointing_sensitivity + and not self._is_endpointing_sensitivity_supported() + ): + logger.warning( + f"endpointing_sensitivity is not supported for model '{model}' and will be ignored. " + "This parameter is only supported starting with Nova 2 Sonic (amazon.nova-2-sonic-v1:0)." + ) + self._params.endpointing_sensitivity = None + if not send_transcription_frames: import warnings @@ -598,11 +618,25 @@ class AWSNovaSonicLLMService(LLMService): ) return BedrockRuntimeClient(config=config) + def _is_endpointing_sensitivity_supported(self) -> bool: + # endpointing_sensitivity is only supported with Nova 2 Sonic (and, + # presumably, future models) + return self._model != "amazon.nova-sonic-v1:0" + # # LLM communication: input events (pipecat -> LLM) # async def _send_session_start_event(self): + turn_detection_config = ( + f""", + "turnDetectionConfiguration": {{ + "endpointingSensitivity": "{self._params.endpointing_sensitivity}" + }}""" + if self._params.endpointing_sensitivity + else "" + ) + session_start = f""" {{ "event": {{ @@ -611,7 +645,7 @@ class AWSNovaSonicLLMService(LLMService): "maxTokens": {self._params.max_tokens}, "topP": {self._params.top_p}, "temperature": {self._params.temperature} - }} + }}{turn_detection_config} }} }} }} From 0c5bccd1f106b51a93397de05347a58e7afbca28 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 9 Dec 2025 11:55:23 -0500 Subject: [PATCH 05/10] Changes related to Nova 2 Sonic's support for the model speaking first --- examples/foundational/40-aws-nova-sonic.py | 18 +++++++-------- src/pipecat/services/aws/nova_sonic/llm.py | 26 +++++++++++++++++----- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/examples/foundational/40-aws-nova-sonic.py b/examples/foundational/40-aws-nova-sonic.py index ed329e281..0033fe6bb 100644 --- a/examples/foundational/40-aws-nova-sonic.py +++ b/examples/foundational/40-aws-nova-sonic.py @@ -91,14 +91,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") # Specify initial system instruction. - # HACK: note that, for now, we need to inject a special bit of text into this instruction to - # allow the first assistant response to be programmatically triggered (which happens in the - # on_client_connected handler, below) system_instruction = ( "You are a friendly assistant. The user and you will engage in a spoken dialog exchanging " "the transcripts of a natural real-time conversation. Keep your responses short, generally " - "two or three sentences for chatty scenarios. " - f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}" + "two or three sentences for chatty scenarios." + # HACK: if using the older Nova Sonic (pre-2) model, note that you need to inject a special + # bit of text into this instruction to allow the first assistant response to be + # programmatically triggered (which happens in the on_client_connected handler) + # f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}" ) # Create the AWS Nova Sonic LLM service @@ -167,10 +167,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Client connected") # Kick off the conversation. await task.queue_frames([LLMRunFrame()]) - # HACK: for now, we need this special way of triggering the first assistant response in AWS - # Nova Sonic. Note that this trigger requires a special corresponding bit of text in the - # system instruction. In the future, simply queueing the context frame should be sufficient. - await llm.trigger_assistant_response() + # HACK: if using the older Nova Sonic (pre-2) model, you need this special way of + # triggering the first assistant response. Note that this trigger requires a special + # corresponding bit of text in the system instruction. + # await llm.trigger_assistant_response() # Handle client disconnection events @transport.event_handler("on_client_disconnected") diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index 641f126e5..25f32e819 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -618,10 +618,18 @@ class AWSNovaSonicLLMService(LLMService): ) return BedrockRuntimeClient(config=config) + def _is_first_generation_sonic_model(self) -> bool: + # Nova Sonic (the older model) is identified by "amazon.nova-sonic-v1:0" + return self._model == "amazon.nova-sonic-v1:0" + def _is_endpointing_sensitivity_supported(self) -> bool: # endpointing_sensitivity is only supported with Nova 2 Sonic (and, # presumably, future models) - return self._model != "amazon.nova-sonic-v1:0" + return not self._is_first_generation_sonic_model() + + def _is_assistant_response_trigger_needed(self) -> bool: + # Assistant response trigger audio is only needed with the older model + return self._is_first_generation_sonic_model() # # LLM communication: input events (pipecat -> LLM) @@ -1230,7 +1238,8 @@ class AWSNovaSonicLLMService(LLMService): ) # - # assistant response trigger (HACK) + # assistant response trigger + # HACK: only needed for the older Nova Sonic (as opposed to Nova 2 Sonic) model # # Class variable @@ -1244,12 +1253,17 @@ class AWSNovaSonicLLMService(LLMService): Sends a pre-recorded "ready" audio trigger to prompt the assistant to start speaking. This is useful for controlling conversation flow. - - Returns: - False if already triggering a response, True otherwise. """ + if not self._is_assistant_response_trigger_needed(): + logger.warning( + f"Assistant response trigger not needed for model '{self._model}'; skipping. " + "An LLMRunFrame() should be sufficient to prompt the assistant to respond, " + "assuming the context ends in a user message." + ) + return + if self._triggering_assistant_response: - return False + return self._triggering_assistant_response = True From b821dd2507a6b67df0f4017367d595ed92a59de8 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 9 Dec 2025 12:12:55 -0500 Subject: [PATCH 06/10] Fix a bug in `AWSNovaSonicLLMService` where we would mishandle cancelled tool calls in context --- src/pipecat/services/aws/nova_sonic/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index 25f32e819..a677a8772 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -486,7 +486,7 @@ class AWSNovaSonicLLMService(LLMService): async def _process_completed_function_calls(self, send_new_results: bool): # Check for set of completed function calls in the context for message in self._context.get_messages(): - if message.get("role") and message.get("content") != "IN_PROGRESS": + if message.get("role") and message.get("content") not in ["IN_PROGRESS", "CANCELLED"]: tool_call_id = message.get("tool_call_id") if tool_call_id and tool_call_id not in self._completed_tool_calls: # Found a newly-completed function call - send the result to the service From 3e66cb50e0a9c27685338114d290e750dca5feb1 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 9 Dec 2025 12:44:21 -0500 Subject: [PATCH 07/10] Update AWS Nova Sonic example to showcase async tool calling --- examples/foundational/40-aws-nova-sonic.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/examples/foundational/40-aws-nova-sonic.py b/examples/foundational/40-aws-nova-sonic.py index 0033fe6bb..c45858153 100644 --- a/examples/foundational/40-aws-nova-sonic.py +++ b/examples/foundational/40-aws-nova-sonic.py @@ -5,7 +5,9 @@ # +import asyncio import os +import random from datetime import datetime from dotenv import load_dotenv @@ -33,7 +35,16 @@ load_dotenv(override=True) async def fetch_weather_from_api(params: FunctionCallParams): - temperature = 75 if params.arguments["format"] == "fahrenheit" else 24 + temperature = ( + random.randint(60, 85) + if params.arguments["format"] == "fahrenheit" + else random.randint(15, 30) + ) + # Simulate a long network delay. + # You can continue chatting while waiting for this to complete. + # With Nova 2 Sonic (the default model), the assistant will respond + # appropriately once the function call is complete. + await asyncio.sleep(10) await params.result_callback( { "conditions": "nice", @@ -125,7 +136,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): # Register function for function calls # you can either register a single function for all function calls, or specific functions # llm.register_function(None, fetch_weather_from_api) - llm.register_function("get_current_weather", fetch_weather_from_api) + llm.register_function( + "get_current_weather", fetch_weather_from_api, cancel_on_interruption=False + ) # Set up context and context management. context = LLMContext( From 3cbfbb997e321db58d533c18652eba2a0f1e2ce5 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 9 Dec 2025 12:57:19 -0500 Subject: [PATCH 08/10] Added CHANGELOG for AWS Nova 2 Sonic-related changes --- changelog/3212.added.md | 6 ++++++ changelog/3212.changed.md | 1 + changelog/3212.fixed.md | 2 ++ 3 files changed, 9 insertions(+) create mode 100644 changelog/3212.added.md create mode 100644 changelog/3212.changed.md create mode 100644 changelog/3212.fixed.md diff --git a/changelog/3212.added.md b/changelog/3212.added.md new file mode 100644 index 000000000..042f926e3 --- /dev/null +++ b/changelog/3212.added.md @@ -0,0 +1,6 @@ +- Added to `AWSNovaSonicLLMService` functionality related to the new (and now + default) Nova 2 Sonic model (`"amazon.nova-2-sonic-v1:0"`): + - Added the `endpointing_sensitivity` parameter to control how quickly the + model decides the user has stopped speaking. + - Made the assistant-response-trigger hack a no-op. It's only needed for the + older Nova Sonic model. diff --git a/changelog/3212.changed.md b/changelog/3212.changed.md new file mode 100644 index 000000000..b63fc16ce --- /dev/null +++ b/changelog/3212.changed.md @@ -0,0 +1 @@ +- Made `"amazon.nova-2-sonic-v1:0"` the new default model for `AWSNovaSonicLLMService`. diff --git a/changelog/3212.fixed.md b/changelog/3212.fixed.md new file mode 100644 index 000000000..73b4acac7 --- /dev/null +++ b/changelog/3212.fixed.md @@ -0,0 +1,2 @@ +- Fixed a bug in `AWSNovaSonicLLMService` where we would mishandle cancelled + tool calls in the context, resulting in errors. From 1892854516e26e070b16a21ea2c658ddea7d5429 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 9 Dec 2025 16:27:23 -0500 Subject: [PATCH 09/10] In the AWS Nova Sonic example, send back "location" from the weather-fetching function to help the model associate a tool response with a tool call...if you interrupt the model while more than one function call is outbound, it seemingly can get confused about which tool result goes which call. --- examples/foundational/40-aws-nova-sonic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/foundational/40-aws-nova-sonic.py b/examples/foundational/40-aws-nova-sonic.py index c45858153..c97261e97 100644 --- a/examples/foundational/40-aws-nova-sonic.py +++ b/examples/foundational/40-aws-nova-sonic.py @@ -49,6 +49,7 @@ async def fetch_weather_from_api(params: FunctionCallParams): { "conditions": "nice", "temperature": temperature, + "location": params.arguments["location"], "format": params.arguments["format"], "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"), } From c37da6ab78974ad1bc371d0cdf31ac6e78447485 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 9 Dec 2025 16:53:18 -0500 Subject: [PATCH 10/10] In the AWS Nova Sonic example, shorten the simulated weather function call delay --- examples/foundational/40-aws-nova-sonic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/foundational/40-aws-nova-sonic.py b/examples/foundational/40-aws-nova-sonic.py index c97261e97..4b389b648 100644 --- a/examples/foundational/40-aws-nova-sonic.py +++ b/examples/foundational/40-aws-nova-sonic.py @@ -44,7 +44,7 @@ async def fetch_weather_from_api(params: FunctionCallParams): # You can continue chatting while waiting for this to complete. # With Nova 2 Sonic (the default model), the assistant will respond # appropriately once the function call is complete. - await asyncio.sleep(10) + await asyncio.sleep(5) await params.result_callback( { "conditions": "nice",