Fix Duplicate / inconsistent EOU

This commit is contained in:
Xin Wang
2026-02-06 07:23:31 +08:00
parent da52a88006
commit 4ceb3ec96f
2 changed files with 3 additions and 15 deletions

View File

@@ -85,8 +85,8 @@ class DuplexPipeline:
# Initialize EOU detector # Initialize EOU detector
self.eou_detector = EouDetector( self.eou_detector = EouDetector(
silence_threshold_ms=600, silence_threshold_ms=settings.vad_eou_threshold_ms,
min_speech_duration_ms=200 min_speech_duration_ms=settings.vad_min_speech_duration_ms
) )
# Initialize services # Initialize services

View File

@@ -6,7 +6,6 @@ from typing import Tuple, Optional
import numpy as np import numpy as np
from loguru import logger from loguru import logger
from processors.eou import EouDetector
# Try to import onnxruntime (optional for VAD functionality) # Try to import onnxruntime (optional for VAD functionality)
try: try:
@@ -160,25 +159,19 @@ class VADProcessor:
Tracks speech/silence state and emits events on transitions. Tracks speech/silence state and emits events on transitions.
""" """
def __init__(self, vad_model: SileroVAD, threshold: float = 0.5, def __init__(self, vad_model: SileroVAD, threshold: float = 0.5):
silence_threshold_ms: int = 1000, min_speech_duration_ms: int = 250):
""" """
Initialize VAD processor. Initialize VAD processor.
Args: Args:
vad_model: Silero VAD model instance vad_model: Silero VAD model instance
threshold: Speech detection threshold threshold: Speech detection threshold
silence_threshold_ms: EOU silence threshold in ms (longer = one EOU across short pauses)
min_speech_duration_ms: EOU min speech duration in ms (ignore very short noises)
""" """
self.vad = vad_model self.vad = vad_model
self.threshold = threshold self.threshold = threshold
self._eou_silence_ms = silence_threshold_ms
self._eou_min_speech_ms = min_speech_duration_ms
self.is_speaking = False self.is_speaking = False
self.speech_start_time: Optional[float] = None self.speech_start_time: Optional[float] = None
self.silence_start_time: Optional[float] = None self.silence_start_time: Optional[float] = None
self.eou_detector = EouDetector(silence_threshold_ms, min_speech_duration_ms)
def process(self, pcm_bytes: bytes, chunk_size_ms: int = 20) -> Optional[Tuple[str, float]]: def process(self, pcm_bytes: bytes, chunk_size_ms: int = 20) -> Optional[Tuple[str, float]]:
""" """
@@ -196,10 +189,6 @@ class VADProcessor:
# Check if this is speech based on threshold # Check if this is speech based on threshold
is_speech = probability >= self.threshold is_speech = probability >= self.threshold
# Check EOU
if self.eou_detector.process("Speech" if is_speech else "Silence"):
return ("eou", probability)
# State transition: Silence -> Speech # State transition: Silence -> Speech
if is_speech and not self.is_speaking: if is_speech and not self.is_speaking:
self.is_speaking = True self.is_speaking = True
@@ -222,4 +211,3 @@ class VADProcessor:
self.is_speaking = False self.is_speaking = False
self.speech_start_time = None self.speech_start_time = None
self.silence_start_time = None self.silence_start_time = None
self.eou_detector = EouDetector(self._eou_silence_ms, self._eou_min_speech_ms)