diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 3b162d48f..574af7bbf 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -52,6 +52,7 @@ from pipecat.frames.frames import ( StartFrame, TextFrame, TranscriptionFrame, + TranslationFrame, UserImageRawFrame, UserMuteStartedFrame, UserMuteStoppedFrame, @@ -795,7 +796,6 @@ class LLMAssistantAggregator(LLMContextAggregator): DeprecationWarning, ) - self._started = 0 self._function_calls_in_progress: Dict[str, Optional[FunctionCallInProgressFrame]] = {} self._function_calls_image_results: Dict[str, UserImageRawFrame] = {} self._context_updated_tasks: Set[asyncio.Task] = set() @@ -917,12 +917,10 @@ class LLMAssistantAggregator(LLMContextAggregator): async def _handle_interruptions(self, frame: InterruptionFrame): await self._trigger_assistant_turn_stopped() - self._started = 0 await self.reset() async def _handle_end_or_cancel(self, frame: Frame): await self._trigger_assistant_turn_stopped() - self._started = 0 async def _handle_function_calls_started(self, frame: FunctionCallsStartedFrame): function_names = [f"{f.function_name}:{f.tool_call_id}" for f in frame.function_calls] @@ -1067,15 +1065,17 @@ class LLMAssistantAggregator(LLMContextAggregator): ) async def _handle_llm_start(self, _: LLMFullResponseStartFrame): - self._started += 1 await self._trigger_assistant_turn_started() async def _handle_llm_end(self, _: LLMFullResponseEndFrame): - self._started -= 1 await self._trigger_assistant_turn_stopped() async def _handle_text(self, frame: TextFrame): - if not self._started or not frame.append_to_context: + # Skip TextFrame types not intended to build the assistant context + if isinstance(frame, (TranscriptionFrame, TranslationFrame, InterimTranscriptionFrame)): + return + + if not frame.append_to_context: return # Make sure we really have text (spaces count, too!) @@ -1089,18 +1089,12 @@ class LLMAssistantAggregator(LLMContextAggregator): ) async def _handle_thought_start(self, frame: LLMThoughtStartFrame): - if not self._started: - return - await self._reset_thought_aggregation() self._thought_append_to_context = frame.append_to_context self._thought_llm = frame.llm self._thought_start_time = time_now_iso8601() async def _handle_thought_text(self, frame: LLMThoughtTextFrame): - if not self._started: - return - # Make sure we really have text (spaces count, too!) if len(frame.text) == 0: return @@ -1112,9 +1106,6 @@ class LLMAssistantAggregator(LLMContextAggregator): ) async def _handle_thought_end(self, frame: LLMThoughtEndFrame): - if not self._started: - return - thought = concatenate_aggregated_text(self._thought_aggregation) if self._thought_append_to_context: