Fix Duplicate / inconsistent EOU

2026-02-06 07:23:31 +08:00
parent da52a88006
commit 4ceb3ec96f
2 changed files with 3 additions and 15 deletions
--- a/processors/vad.py
+++ b/processors/vad.py
@@ -6,7 +6,6 @@ from typing import Tuple, Optional
 import numpy as np
 from loguru import logger

-from processors.eou import EouDetector

 # Try to import onnxruntime (optional for VAD functionality)
 try:
@@ -160,25 +159,19 @@ class VADProcessor:
    Tracks speech/silence state and emits events on transitions.
    """

-    def __init__(self, vad_model: SileroVAD, threshold: float = 0.5,
-                 silence_threshold_ms: int = 1000, min_speech_duration_ms: int = 250):
+    def __init__(self, vad_model: SileroVAD, threshold: float = 0.5):
        """
        Initialize VAD processor.

        Args:
            vad_model: Silero VAD model instance
            threshold: Speech detection threshold
-            silence_threshold_ms: EOU silence threshold in ms (longer = one EOU across short pauses)
-            min_speech_duration_ms: EOU min speech duration in ms (ignore very short noises)
        """
        self.vad = vad_model
        self.threshold = threshold
-        self._eou_silence_ms = silence_threshold_ms
-        self._eou_min_speech_ms = min_speech_duration_ms
        self.is_speaking = False
        self.speech_start_time: Optional[float] = None
        self.silence_start_time: Optional[float] = None
-        self.eou_detector = EouDetector(silence_threshold_ms, min_speech_duration_ms)

    def process(self, pcm_bytes: bytes, chunk_size_ms: int = 20) -> Optional[Tuple[str, float]]:
        """
@@ -196,10 +189,6 @@ class VADProcessor:
        # Check if this is speech based on threshold
        is_speech = probability >= self.threshold
        
-        # Check EOU
-        if self.eou_detector.process("Speech" if is_speech else "Silence"):
-            return ("eou", probability)
-
        # State transition: Silence -> Speech
        if is_speech and not self.is_speaking:
            self.is_speaking = True
@@ -222,4 +211,3 @@ class VADProcessor:
        self.is_speaking = False
        self.speech_start_time = None
        self.silence_start_time = None
-        self.eou_detector = EouDetector(self._eou_silence_ms, self._eou_min_speech_ms)