Keeping the _speech_triggered as true if the state is incomplete.

This commit is contained in:
Filipi Fuchter
2025-04-17 16:46:15 -03:00
parent a80f82cdb6
commit 3ea9cfd251
2 changed files with 18 additions and 14 deletions

View File

@@ -82,7 +82,7 @@ class BaseSmartTurn(ABC):
f"End of Turn complete due to stop_secs. Silence in ms: {self._silence_ms}"
)
state = EndOfTurnState.COMPLETE
self._clear()
self._clear(state)
else:
# Trim buffer to prevent unbounded growth before speech
max_buffer_time = (
@@ -101,14 +101,15 @@ class BaseSmartTurn(ABC):
logger.debug("Analyzing End of Turn...")
state = self._process_speech_segment(self._audio_buffer)
if state == EndOfTurnState.COMPLETE or USE_ONLY_LAST_VAD_SEGMENT:
self._clear()
self._clear(state)
logger.debug(f"End of Turn result: {state}")
return state
def _clear(self):
def _clear(self, turn_state: EndOfTurnState):
# Reset internal state for next turn
logger.debug("Clearing audio buffer...")
self._speech_triggered = False
# If the state is still incomplete, keep the _speech_triggered as True
self._speech_triggered = turn_state == EndOfTurnState.INCOMPLETE
self._audio_buffer = []
self._speech_start_time = None
self._silence_ms = 0

View File

@@ -221,6 +221,18 @@ class BaseInputTransport(FrameProcessor):
await self.push_frame(UserEndOfTurnFrame())
await self._handle_user_interruption(UserStoppedSpeakingFrame())
async def _run_turn_analyzer(self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState):
is_speech = vad_state == VADState.SPEAKING or vad_state == VADState.STARTING
# If silence exceeds threshold, we are going to receive EndOfTurnState.COMPLETE
end_of_turn_state = self._params.end_of_turn_analyzer.append_audio(
frame.audio, is_speech
)
if end_of_turn_state == EndOfTurnState.COMPLETE:
await self._handle_end_of_turn_complete(end_of_turn_state)
# Otherwise we are going to trigger to check if the turn is completed based on the VAD
elif vad_state == VADState.QUIET and vad_state != previous_vad_state:
await self._handle_end_of_turn()
async def _audio_task_handler(self):
vad_state: VADState = VADState.QUIET
while True:
@@ -240,16 +252,7 @@ class BaseInputTransport(FrameProcessor):
audio_passthrough = self._params.vad_audio_passthrough
if self._params.end_of_turn_analyzer:
is_speech = vad_state == VADState.SPEAKING or vad_state == VADState.STARTING
# If silence exceeds threshold, we are going to receive EndOfTurnState.COMPLETE
end_of_turn_state = self._params.end_of_turn_analyzer.append_audio(
frame.audio, is_speech
)
if end_of_turn_state == EndOfTurnState.COMPLETE:
await self._handle_end_of_turn_complete(end_of_turn_state)
# Otherwise we are going to trigger to check if the turn is completed based on the VAD
elif vad_state == VADState.QUIET and vad_state != previous_vad_state:
await self._handle_end_of_turn()
await self._run_turn_analyzer(frame, vad_state, previous_vad_state)
# Push audio downstream if passthrough.
if audio_passthrough: