Fix AWS Nova Sonic reset_conversation(), which would previously error out.

Issues:
- After disconnecting, we were prematurely sending audio messages using the new prompt and content names, before the new prompt and content were created
- We weren't properly sending system instruction and conversation history messages to Nova Sonic with `"interactive": false`
This commit is contained in:
Paul Kompfner
2026-01-16 22:01:39 -05:00
parent b11150f31f
commit 6fa797c8e4

View File

@@ -296,6 +296,7 @@ class AWSNovaSonicLLMService(LLMService):
self._user_text_buffer = ""
self._assistant_text_buffer = ""
self._completed_tool_calls = set()
self._audio_input_started = False
file_path = files("pipecat.services.aws.nova_sonic").joinpath("ready.wav")
with wave.open(file_path.open("rb"), "rb") as wav_file:
@@ -533,9 +534,16 @@ class AWSNovaSonicLLMService(LLMService):
await self._send_text_event(text=system_instruction, role=Role.SYSTEM)
# Send conversation history
for message in llm_connection_params["messages"]:
messages = llm_connection_params["messages"]
for i, message in enumerate(messages):
# logger.debug(f"Seeding conversation history with message: {message}")
await self._send_text_event(text=message.text, role=message.role)
# If last message is from user, mark it as interactive to trigger
# bot response
is_last_message = i == len(messages) - 1
interactive = is_last_message and message.role == Role.USER
await self._send_text_event(
text=message.text, role=message.role, interactive=interactive
)
# Start audio input
await self._send_audio_input_start_event()
@@ -602,6 +610,7 @@ class AWSNovaSonicLLMService(LLMService):
self._user_text_buffer = ""
self._assistant_text_buffer = ""
self._completed_tool_calls = set()
self._audio_input_started = False
logger.info("Finished disconnecting")
except Exception as e:
@@ -727,8 +736,18 @@ class AWSNovaSonicLLMService(LLMService):
}}
'''
await self._send_client_event(audio_content_start)
self._audio_input_started = True
async def _send_text_event(self, text: str, role: Role):
async def _send_text_event(self, text: str, role: Role, interactive: bool = False):
"""Send a text event to the LLM.
Args:
text: The text content to send.
role: The role associated with the text (e.g., USER, ASSISTANT, SYSTEM).
interactive: Whether the content is interactive. Defaults to False.
False: conversation history or system instruction, sent prior to interactive audio
True: text input sent during (or at the start of) interactive audio
"""
if not self._stream or not self._prompt_name or not text:
return
@@ -741,7 +760,7 @@ class AWSNovaSonicLLMService(LLMService):
"promptName": "{self._prompt_name}",
"contentName": "{content_name}",
"type": "TEXT",
"interactive": true,
"interactive": {json.dumps(interactive)},
"role": "{role.value}",
"textInputConfiguration": {{
"mediaType": "text/plain"
@@ -779,7 +798,7 @@ class AWSNovaSonicLLMService(LLMService):
await self._send_client_event(text_content_end)
async def _send_user_audio_event(self, audio: bytes):
if not self._stream:
if not self._stream or not self._audio_input_started:
return
blob = base64.b64encode(audio)