Remove deprecated vad_events and should_interrupt from DeepgramSTTService

Deepgram's built-in VAD events were deprecated in 0.0.99 in favor of Silero VAD. This removes vad_events from settings and LiveOptions, the should_interrupt parameter, the vad_enabled property, _on_speech_started/_on_utterance_end handlers, and simplifies _on_message and process_frame accordingly.
2026-04-02 22:05:49 -04:00
parent 60933b7a56
commit 41e46ee69e
2 changed files with 2 additions and 79 deletions
--- a/src/pipecat/services/deepgram/stt.py
+++ b/src/pipecat/services/deepgram/stt.py
@@ -19,8 +19,6 @@ from pipecat.frames.frames import (
    InterimTranscriptionFrame,
    StartFrame,
    TranscriptionFrame,
-    UserStartedSpeakingFrame,
-    UserStoppedSpeakingFrame,
    VADUserStartedSpeakingFrame,
    VADUserStoppedSpeakingFrame,
 )
@@ -45,8 +43,6 @@ try:
        ListenV1Finalize,
        ListenV1KeepAlive,
        ListenV1Results,
-        ListenV1SpeechStarted,
-        ListenV1UtteranceEnd,
    )
 except ModuleNotFoundError as e:
    logger.error(f"Exception: {e}")
@@ -94,7 +90,6 @@ class LiveOptions:
        smart_format: Optional[bool] = None,
        tag: Optional[Any] = None,
        utterance_end_ms: Optional[int] = None,
-        vad_events: Optional[bool] = None,
        version: Optional[str] = None,
        **kwargs,
    ):
@@ -127,7 +122,6 @@ class LiveOptions:
            smart_format: Apply smart formatting to transcripts.
            tag: Custom billing tag (str or list of str).
            utterance_end_ms: Silence duration in ms before an utterance-end event.
-            vad_events: Enable Deepgram VAD speech-started / utterance-end events.
            version: Model version (e.g. ``"latest"``).
            **kwargs: Any additional Deepgram query parameters.
        """
@@ -157,7 +151,6 @@ class LiveOptions:
        self.smart_format = smart_format
        self.tag = tag
        self.utterance_end_ms = utterance_end_ms
-        self.vad_events = vad_events
        self.version = version
        self._extra = kwargs

@@ -201,7 +194,6 @@ class DeepgramSTTSettings(STTSettings):
        search: Search terms to highlight (str or list of str).
        smart_format: Apply smart formatting to transcripts.
        utterance_end_ms: Silence duration in ms before an utterance-end event.
-        vad_events: Enable Deepgram VAD speech-started / utterance-end events.
    """

    detect_entities: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@@ -219,7 +211,6 @@ class DeepgramSTTSettings(STTSettings):
    search: Any | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
    smart_format: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
    utterance_end_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
-    vad_events: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)

    def _sync_extra_to_fields(self) -> None:
        """Sync values from extra dict to declared fields.
@@ -294,17 +285,6 @@ class DeepgramSTTService(STTService):

    Provides real-time speech recognition using Deepgram's WebSocket API.
    Supports configurable models, languages, and various audio processing options.
-
-    Event handlers available (in addition to STTService events):
-
-    - on_speech_started(service): Deepgram detected start of speech
-    - on_utterance_end(service): Deepgram detected end of utterance
-
-    Example::
-
-        @stt.event_handler("on_speech_started")
-        async def on_speech_started(service):
-            ...
    """

    Settings = DeepgramSTTSettings
@@ -325,7 +305,6 @@ class DeepgramSTTService(STTService):
        mip_opt_out: Optional[bool] = None,
        live_options: Optional[LiveOptions] = None,
        addons: Optional[dict] = None,
-        should_interrupt: bool = True,
        settings: Optional[Settings] = None,
        ttfs_p99_latency: Optional[float] = DEEPGRAM_TTFS_P99,
        **kwargs,
@@ -352,21 +331,12 @@ class DeepgramSTTService(STTService):
                    fields and direct init parameters for connection-level config.

            addons: Additional Deepgram features to enable.
-            should_interrupt: Whether to interrupt the bot when Deepgram VAD
-                detects the user is speaking.
-
-                .. deprecated:: 0.0.99
-                    This parameter will be removed along with `vad_events` support.
-
            settings: Runtime-updatable settings. When provided alongside
                ``live_options``, ``settings`` values take precedence (applied
                after the ``live_options`` merge).
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to the parent STTService.
-
-        Note:
-            The `vad_events` option in LiveOptions is deprecated as of version 0.0.99 and will be removed in a future version. Please use the Silero VAD instead.
        """
        # 1. Initialize default_settings with hardcoded defaults
        default_settings = self.Settings(
@@ -387,7 +357,6 @@ class DeepgramSTTService(STTService):
            search=None,
            smart_format=False,
            utterance_end_ms=None,
-            vad_events=False,
        )

        # 2. (No step 2, as there are no deprecated direct args)
@@ -444,7 +413,6 @@ class DeepgramSTTService(STTService):
        )

        self._addons = addons
-        self._should_interrupt = should_interrupt
        self._encoding = encoding
        self._channels = channels
        self._multichannel = multichannel
@@ -453,18 +421,6 @@ class DeepgramSTTService(STTService):
        self._tag = tag
        self._mip_opt_out = mip_opt_out

-        if self._settings.vad_events:
-            import warnings
-
-            with warnings.catch_warnings():
-                warnings.simplefilter("always")
-                warnings.warn(
-                    "The 'vad_events' parameter is deprecated and will be removed in a future version. "
-                    "Please use the Silero VAD instead.",
-                    DeprecationWarning,
-                    stacklevel=2,
-                )
-
        # Build client - support optional custom base URL via DeepgramClientEnvironment
        if base_url:
            try:
@@ -488,19 +444,6 @@ class DeepgramSTTService(STTService):
        self._connection = None
        self._connection_task = None

-        if self.vad_enabled:
-            self._register_event_handler("on_speech_started")
-            self._register_event_handler("on_utterance_end")
-
-    @property
-    def vad_enabled(self):
-        """Check if Deepgram VAD events are enabled.
-
-        Returns:
-            True if VAD events are enabled in the current settings.
-        """
-        return self._settings.vad_events
-
    def can_generate_metrics(self) -> bool:
        """Check if this service can generate processing metrics.

@@ -705,17 +648,6 @@ class DeepgramSTTService(STTService):
        # Reconnection is handled automatically by the retry loop in
        # _connection_handler once start_listening() exits after the error.

-    async def _on_speech_started(self, message):
-        await self._start_metrics()
-        await self._call_event_handler("on_speech_started", message)
-        await self.broadcast_frame(UserStartedSpeakingFrame)
-        if self._should_interrupt:
-            await self.broadcast_interruption()
-
-    async def _on_utterance_end(self, message):
-        await self._call_event_handler("on_utterance_end", message)
-        await self.broadcast_frame(UserStoppedSpeakingFrame)
-
    @traced_stt
    async def _handle_transcription(
        self, transcript: str, is_final: bool, language: Optional[Language] = None
@@ -724,13 +656,7 @@ class DeepgramSTTService(STTService):
        pass

    async def _on_message(self, message):
-        if isinstance(message, ListenV1SpeechStarted):
-            if self.vad_enabled:
-                await self._on_speech_started(message)
-        elif isinstance(message, ListenV1UtteranceEnd):
-            if self.vad_enabled:
-                await self._on_utterance_end(message)
-        elif isinstance(message, ListenV1Results):
+        if isinstance(message, ListenV1Results):
            if not message.channel or len(message.channel.alternatives) == 0:
                return
            is_final = message.is_final
@@ -778,8 +704,7 @@ class DeepgramSTTService(STTService):
        """
        await super().process_frame(frame, direction)

-        if isinstance(frame, VADUserStartedSpeakingFrame) and not self.vad_enabled:
-            # Start metrics if Deepgram VAD is disabled & pipeline VAD has detected speech
+        if isinstance(frame, VADUserStartedSpeakingFrame):
            await self._start_metrics()
        elif isinstance(frame, VADUserStoppedSpeakingFrame):
            # https://developers.deepgram.com/docs/finalize
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -334,7 +334,6 @@ class TestDeepgramSTTSettingsApplyUpdate:
            smart_format=False,
            punctuate=True,
            profanity_filter=True,
-            vad_events=False,
        )
        defaults.update(kwargs)
        return DeepgramSTTSettings(**defaults)
@@ -430,7 +429,6 @@ class TestDeepgramSTTSettingsFromMapping:
            interim_results=True,
            punctuate=True,
            profanity_filter=True,
-            vad_events=False,
        )

        raw = {"punctuate": False, "diarize": True}