From 3ea9cfd2518cd0abfab3375f2afc741ff242c32a Mon Sep 17 00:00:00 2001
From: Filipi Fuchter <filipi87@gmail.com>
Date: Thu, 17 Apr 2025 16:46:15 -0300
Subject: [PATCH] Keeping the _speech_triggered as true if the state is
 incomplete.

---
 src/pipecat/audio/turn/base_smart_turn.py |  9 +++++----
 src/pipecat/transports/base_input.py      | 23 +++++++++++++----------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/pipecat/audio/turn/base_smart_turn.py b/src/pipecat/audio/turn/base_smart_turn.py
index c08fd8fb3..cc18b2880 100644
--- a/src/pipecat/audio/turn/base_smart_turn.py
+++ b/src/pipecat/audio/turn/base_smart_turn.py
@@ -82,7 +82,7 @@ class BaseSmartTurn(ABC):
                         f"End of Turn complete due to stop_secs. Silence in ms: {self._silence_ms}"
                     )
                     state = EndOfTurnState.COMPLETE
-                    self._clear()
+                    self._clear(state)
             else:
                 # Trim buffer to prevent unbounded growth before speech
                 max_buffer_time = (
@@ -101,14 +101,15 @@ class BaseSmartTurn(ABC):
         logger.debug("Analyzing End of Turn...")
         state = self._process_speech_segment(self._audio_buffer)
         if state == EndOfTurnState.COMPLETE or USE_ONLY_LAST_VAD_SEGMENT:
-            self._clear()
+            self._clear(state)
         logger.debug(f"End of Turn result: {state}")
         return state
 
-    def _clear(self):
+    def _clear(self, turn_state: EndOfTurnState):
         # Reset internal state for next turn
         logger.debug("Clearing audio buffer...")
-        self._speech_triggered = False
+        # If the state is still incomplete, keep the _speech_triggered as True
+        self._speech_triggered = turn_state == EndOfTurnState.INCOMPLETE
         self._audio_buffer = []
         self._speech_start_time = None
         self._silence_ms = 0
diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py
index 7ae6d3e64..b43b71a49 100644
--- a/src/pipecat/transports/base_input.py
+++ b/src/pipecat/transports/base_input.py
@@ -221,6 +221,18 @@ class BaseInputTransport(FrameProcessor):
             await self.push_frame(UserEndOfTurnFrame())
             await self._handle_user_interruption(UserStoppedSpeakingFrame())
 
+    async def _run_turn_analyzer(self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState):
+        is_speech = vad_state == VADState.SPEAKING or vad_state == VADState.STARTING
+        # If silence exceeds threshold, we are going to receive EndOfTurnState.COMPLETE
+        end_of_turn_state = self._params.end_of_turn_analyzer.append_audio(
+            frame.audio, is_speech
+        )
+        if end_of_turn_state == EndOfTurnState.COMPLETE:
+            await self._handle_end_of_turn_complete(end_of_turn_state)
+        # Otherwise we are going to trigger to check if the turn is completed based on the VAD
+        elif vad_state == VADState.QUIET and vad_state != previous_vad_state:
+            await self._handle_end_of_turn()
+
     async def _audio_task_handler(self):
         vad_state: VADState = VADState.QUIET
         while True:
@@ -240,16 +252,7 @@ class BaseInputTransport(FrameProcessor):
                 audio_passthrough = self._params.vad_audio_passthrough
 
             if self._params.end_of_turn_analyzer:
-                is_speech = vad_state == VADState.SPEAKING or vad_state == VADState.STARTING
-                # If silence exceeds threshold, we are going to receive EndOfTurnState.COMPLETE
-                end_of_turn_state = self._params.end_of_turn_analyzer.append_audio(
-                    frame.audio, is_speech
-                )
-                if end_of_turn_state == EndOfTurnState.COMPLETE:
-                    await self._handle_end_of_turn_complete(end_of_turn_state)
-                # Otherwise we are going to trigger to check if the turn is completed based on the VAD
-                elif vad_state == VADState.QUIET and vad_state != previous_vad_state:
-                    await self._handle_end_of_turn()
+                await self._run_turn_analyzer(frame, vad_state, previous_vad_state)
 
             # Push audio downstream if passthrough.
             if audio_passthrough: