Fix AWS Nova Sonic reset_conversation(), which would previously error out.

Issues: - After disconnecting, we were prematurely sending audio messages using the new prompt and content names, before the new prompt and content were created - We weren't properly sending system instruction and conversation history messages to Nova Sonic with `"interactive": false`
2026-01-16 22:01:39 -05:00
parent b11150f31f
commit 6fa797c8e4
1 changed files with 24 additions and 5 deletions
--- a/src/pipecat/services/aws/nova_sonic/llm.py
+++ b/src/pipecat/services/aws/nova_sonic/llm.py
@@ -296,6 +296,7 @@ class AWSNovaSonicLLMService(LLMService):
        self._user_text_buffer = ""
        self._assistant_text_buffer = ""
        self._completed_tool_calls = set()
+        self._audio_input_started = False

        file_path = files("pipecat.services.aws.nova_sonic").joinpath("ready.wav")
        with wave.open(file_path.open("rb"), "rb") as wav_file:
@@ -533,9 +534,16 @@ class AWSNovaSonicLLMService(LLMService):
            await self._send_text_event(text=system_instruction, role=Role.SYSTEM)

        # Send conversation history
-        for message in llm_connection_params["messages"]:
+        messages = llm_connection_params["messages"]
+        for i, message in enumerate(messages):
            # logger.debug(f"Seeding conversation history with message: {message}")
-            await self._send_text_event(text=message.text, role=message.role)
+            # If last message is from user, mark it as interactive to trigger
+            # bot response
+            is_last_message = i == len(messages) - 1
+            interactive = is_last_message and message.role == Role.USER
+            await self._send_text_event(
+                text=message.text, role=message.role, interactive=interactive
+            )

        # Start audio input
        await self._send_audio_input_start_event()
@@ -602,6 +610,7 @@ class AWSNovaSonicLLMService(LLMService):
            self._user_text_buffer = ""
            self._assistant_text_buffer = ""
            self._completed_tool_calls = set()
+            self._audio_input_started = False

            logger.info("Finished disconnecting")
        except Exception as e:
@@ -727,8 +736,18 @@ class AWSNovaSonicLLMService(LLMService):
        }}
        '''
        await self._send_client_event(audio_content_start)
+        self._audio_input_started = True

-    async def _send_text_event(self, text: str, role: Role):
+    async def _send_text_event(self, text: str, role: Role, interactive: bool = False):
+        """Send a text event to the LLM.
+
+        Args:
+            text: The text content to send.
+            role: The role associated with the text (e.g., USER, ASSISTANT, SYSTEM).
+            interactive: Whether the content is interactive. Defaults to False.
+                False: conversation history or system instruction, sent prior to interactive audio
+                True: text input sent during (or at the start of) interactive audio
+        """
        if not self._stream or not self._prompt_name or not text:
            return

@@ -741,7 +760,7 @@ class AWSNovaSonicLLMService(LLMService):
                    "promptName": "{self._prompt_name}",
                    "contentName": "{content_name}",
                    "type": "TEXT",
-                    "interactive": true,
+                    "interactive": {json.dumps(interactive)},
                    "role": "{role.value}",
                    "textInputConfiguration": {{
                        "mediaType": "text/plain"
@@ -779,7 +798,7 @@ class AWSNovaSonicLLMService(LLMService):
        await self._send_client_event(text_content_end)

    async def _send_user_audio_event(self, audio: bytes):
-        if not self._stream:
+        if not self._stream or not self._audio_input_started:
            return

        blob = base64.b64encode(audio)