Keeping the _speech_triggered as true if the state is incomplete.
This commit is contained in:
@@ -82,7 +82,7 @@ class BaseSmartTurn(ABC):
|
||||
f"End of Turn complete due to stop_secs. Silence in ms: {self._silence_ms}"
|
||||
)
|
||||
state = EndOfTurnState.COMPLETE
|
||||
self._clear()
|
||||
self._clear(state)
|
||||
else:
|
||||
# Trim buffer to prevent unbounded growth before speech
|
||||
max_buffer_time = (
|
||||
@@ -101,14 +101,15 @@ class BaseSmartTurn(ABC):
|
||||
logger.debug("Analyzing End of Turn...")
|
||||
state = self._process_speech_segment(self._audio_buffer)
|
||||
if state == EndOfTurnState.COMPLETE or USE_ONLY_LAST_VAD_SEGMENT:
|
||||
self._clear()
|
||||
self._clear(state)
|
||||
logger.debug(f"End of Turn result: {state}")
|
||||
return state
|
||||
|
||||
def _clear(self):
|
||||
def _clear(self, turn_state: EndOfTurnState):
|
||||
# Reset internal state for next turn
|
||||
logger.debug("Clearing audio buffer...")
|
||||
self._speech_triggered = False
|
||||
# If the state is still incomplete, keep the _speech_triggered as True
|
||||
self._speech_triggered = turn_state == EndOfTurnState.INCOMPLETE
|
||||
self._audio_buffer = []
|
||||
self._speech_start_time = None
|
||||
self._silence_ms = 0
|
||||
|
||||
@@ -221,6 +221,18 @@ class BaseInputTransport(FrameProcessor):
|
||||
await self.push_frame(UserEndOfTurnFrame())
|
||||
await self._handle_user_interruption(UserStoppedSpeakingFrame())
|
||||
|
||||
async def _run_turn_analyzer(self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState):
|
||||
is_speech = vad_state == VADState.SPEAKING or vad_state == VADState.STARTING
|
||||
# If silence exceeds threshold, we are going to receive EndOfTurnState.COMPLETE
|
||||
end_of_turn_state = self._params.end_of_turn_analyzer.append_audio(
|
||||
frame.audio, is_speech
|
||||
)
|
||||
if end_of_turn_state == EndOfTurnState.COMPLETE:
|
||||
await self._handle_end_of_turn_complete(end_of_turn_state)
|
||||
# Otherwise we are going to trigger to check if the turn is completed based on the VAD
|
||||
elif vad_state == VADState.QUIET and vad_state != previous_vad_state:
|
||||
await self._handle_end_of_turn()
|
||||
|
||||
async def _audio_task_handler(self):
|
||||
vad_state: VADState = VADState.QUIET
|
||||
while True:
|
||||
@@ -240,16 +252,7 @@ class BaseInputTransport(FrameProcessor):
|
||||
audio_passthrough = self._params.vad_audio_passthrough
|
||||
|
||||
if self._params.end_of_turn_analyzer:
|
||||
is_speech = vad_state == VADState.SPEAKING or vad_state == VADState.STARTING
|
||||
# If silence exceeds threshold, we are going to receive EndOfTurnState.COMPLETE
|
||||
end_of_turn_state = self._params.end_of_turn_analyzer.append_audio(
|
||||
frame.audio, is_speech
|
||||
)
|
||||
if end_of_turn_state == EndOfTurnState.COMPLETE:
|
||||
await self._handle_end_of_turn_complete(end_of_turn_state)
|
||||
# Otherwise we are going to trigger to check if the turn is completed based on the VAD
|
||||
elif vad_state == VADState.QUIET and vad_state != previous_vad_state:
|
||||
await self._handle_end_of_turn()
|
||||
await self._run_turn_analyzer(frame, vad_state, previous_vad_state)
|
||||
|
||||
# Push audio downstream if passthrough.
|
||||
if audio_passthrough:
|
||||
|
||||
Reference in New Issue
Block a user