diff --git a/data/response.wav b/data/response.wav
new file mode 100644
index 0000000..ecf1f9a
Binary files /dev/null and b/data/response.wav differ
diff --git a/processors/vad.py b/processors/vad.py
index 65bb9ce..1c938fa 100644
--- a/processors/vad.py
+++ b/processors/vad.py
@@ -82,8 +82,20 @@ class SileroVAD:
             Tuple of (label, probability) where label is "Speech" or "Silence"
         """
         if self.session is None or not ONNX_AVAILABLE:
-            # If model not loaded or onnxruntime not available, assume speech
-            return "Speech", 1.0
+            # Fallback energy-based VAD when model isn't available.
+            # Map RMS energy to a pseudo-probability so the existing threshold works.
+            if not pcm_bytes:
+                return "Silence", 0.0
+            audio_int16 = np.frombuffer(pcm_bytes, dtype=np.int16)
+            if audio_int16.size == 0:
+                return "Silence", 0.0
+            audio_float = audio_int16.astype(np.float32) / 32768.0
+            rms = float(np.sqrt(np.mean(audio_float * audio_float)))
+            # Typical speech RMS is ~0.02-0.05 at 16-bit normalized scale.
+            # Normalize so threshold=0.5 roughly corresponds to ~0.025 RMS.
+            probability = min(1.0, rms / 0.05)
+            label = "Speech" if probability >= 0.5 else "Silence"
+            return label, probability
 
         # Convert bytes to numpy array of int16
         audio_int16 = np.frombuffer(pcm_bytes, dtype=np.int16)