diff --git a/src/dailyai/conversation_wrappers.py b/src/dailyai/conversation_wrappers.py index bb83f1272..7f688477c 100644 --- a/src/dailyai/conversation_wrappers.py +++ b/src/dailyai/conversation_wrappers.py @@ -43,10 +43,10 @@ class InterruptibleConversationWrapper: async def speak_after_delay(self, user_speech, messages): await asyncio.sleep(self._delay_before_speech_seconds) tma_in = self._llm_context_aggregator_in( - messages, "user", self._my_participant_id, False + messages, self._my_participant_id, complete_sentences=False ) tma_out = self._llm_context_aggregator_out( - messages, "assistant", self._my_participant_id + messages, self._my_participant_id ) await self._runner(user_speech, tma_in, tma_out) diff --git a/src/dailyai/queue_aggregators.py b/src/dailyai/queue_aggregators.py index 545c86728..55461e29c 100644 --- a/src/dailyai/queue_aggregators.py +++ b/src/dailyai/queue_aggregators.py @@ -47,17 +47,17 @@ class LLMContextAggregator(AIService): yield frame return + # Ignore transcription frames from the bot + if isinstance(frame, TranscriptionQueueFrame): + if frame.participantId == self.bot_participant_id: + return + # The common case for "pass through" is receiving frames from the LLM that we'll # use to update the "assistant" LLM messages, but also passing the text frames # along to a TTS service to be spoken to the user. if self.pass_through: yield frame - # Ignore transcription frames from the bot - if isinstance(frame, TranscriptionQueueFrame): - if frame.participantId == self.bot_participant_id: - return - # TODO: split up transcription by participant if self.complete_sentences: self.sentence += frame.text # type: ignore -- the linter thinks this isn't a TextQueueFrame, even though we check it above