Files
py-active-call/processors/eou.py
2026-01-29 13:57:12 +08:00

81 lines
2.8 KiB
Python

"""End-of-Utterance Detection."""
import time
from typing import Optional
class EouDetector:
"""
End-of-utterance detector. Fires EOU only after continuous silence for
silence_threshold_ms. Short pauses between sentences do not trigger EOU
because speech resets the silence timer (one EOU per turn).
"""
def __init__(self, silence_threshold_ms: int = 1000, min_speech_duration_ms: int = 250):
"""
Initialize EOU detector.
Args:
silence_threshold_ms: How long silence must last to trigger EOU (default 1000ms)
min_speech_duration_ms: Minimum speech duration to consider valid (default 250ms)
"""
self.threshold = silence_threshold_ms / 1000.0
self.min_speech = min_speech_duration_ms / 1000.0
self._silence_threshold_ms = silence_threshold_ms
self._min_speech_duration_ms = min_speech_duration_ms
# State
self.is_speaking = False
self.speech_start_time = 0.0
self.silence_start_time: Optional[float] = None
self.triggered = False
def process(self, vad_status: str) -> bool:
"""
Process VAD status and detect end of utterance.
Input: "Speech" or "Silence" (from VAD).
Output: True if EOU detected, False otherwise.
Short breaks between phrases reset the silence clock when speech
resumes, so only one EOU is emitted after the user truly stops.
"""
now = time.time()
if vad_status == "Speech":
if not self.is_speaking:
self.is_speaking = True
self.speech_start_time = now
self.triggered = False
# Any speech resets silence timer — short pause + more speech = one utterance
self.silence_start_time = None
return False
if vad_status == "Silence":
if not self.is_speaking:
return False
if self.silence_start_time is None:
self.silence_start_time = now
speech_duration = self.silence_start_time - self.speech_start_time
if speech_duration < self.min_speech:
self.is_speaking = False
self.silence_start_time = None
return False
silence_duration = now - self.silence_start_time
if silence_duration >= self.threshold and not self.triggered:
self.triggered = True
self.is_speaking = False
self.silence_start_time = None
return True
return False
def reset(self) -> None:
"""Reset EOU detector state."""
self.is_speaking = False
self.speech_start_time = 0.0
self.silence_start_time = None
self.triggered = False