Integrate eou and vad
This commit is contained in:
80
processors/eou.py
Normal file
80
processors/eou.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""End-of-Utterance Detection."""
|
||||
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class EouDetector:
|
||||
"""
|
||||
End-of-utterance detector. Fires EOU only after continuous silence for
|
||||
silence_threshold_ms. Short pauses between sentences do not trigger EOU
|
||||
because speech resets the silence timer (one EOU per turn).
|
||||
"""
|
||||
|
||||
def __init__(self, silence_threshold_ms: int = 1000, min_speech_duration_ms: int = 250):
|
||||
"""
|
||||
Initialize EOU detector.
|
||||
|
||||
Args:
|
||||
silence_threshold_ms: How long silence must last to trigger EOU (default 1000ms)
|
||||
min_speech_duration_ms: Minimum speech duration to consider valid (default 250ms)
|
||||
"""
|
||||
self.threshold = silence_threshold_ms / 1000.0
|
||||
self.min_speech = min_speech_duration_ms / 1000.0
|
||||
self._silence_threshold_ms = silence_threshold_ms
|
||||
self._min_speech_duration_ms = min_speech_duration_ms
|
||||
|
||||
# State
|
||||
self.is_speaking = False
|
||||
self.speech_start_time = 0.0
|
||||
self.silence_start_time: Optional[float] = None
|
||||
self.triggered = False
|
||||
|
||||
def process(self, vad_status: str) -> bool:
|
||||
"""
|
||||
Process VAD status and detect end of utterance.
|
||||
|
||||
Input: "Speech" or "Silence" (from VAD).
|
||||
Output: True if EOU detected, False otherwise.
|
||||
|
||||
Short breaks between phrases reset the silence clock when speech
|
||||
resumes, so only one EOU is emitted after the user truly stops.
|
||||
"""
|
||||
now = time.time()
|
||||
|
||||
if vad_status == "Speech":
|
||||
if not self.is_speaking:
|
||||
self.is_speaking = True
|
||||
self.speech_start_time = now
|
||||
self.triggered = False
|
||||
# Any speech resets silence timer — short pause + more speech = one utterance
|
||||
self.silence_start_time = None
|
||||
return False
|
||||
|
||||
if vad_status == "Silence":
|
||||
if not self.is_speaking:
|
||||
return False
|
||||
if self.silence_start_time is None:
|
||||
self.silence_start_time = now
|
||||
|
||||
speech_duration = self.silence_start_time - self.speech_start_time
|
||||
if speech_duration < self.min_speech:
|
||||
self.is_speaking = False
|
||||
self.silence_start_time = None
|
||||
return False
|
||||
|
||||
silence_duration = now - self.silence_start_time
|
||||
if silence_duration >= self.threshold and not self.triggered:
|
||||
self.triggered = True
|
||||
self.is_speaking = False
|
||||
self.silence_start_time = None
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reset EOU detector state."""
|
||||
self.is_speaking = False
|
||||
self.speech_start_time = 0.0
|
||||
self.silence_start_time = None
|
||||
self.triggered = False
|
||||
Reference in New Issue
Block a user