SegmentedSTTService: use VAD user started/stopped speaking frames

2025-12-14 12:27:53 -08:00
parent 49ebe34599
commit 222ccbb471
1 changed files with 6 additions and 11 deletions
--- a/src/pipecat/services/stt_service.py
+++ b/src/pipecat/services/stt_service.py
@@ -20,8 +20,8 @@ from pipecat.frames.frames import (
    StartFrame,
    STTMuteFrame,
    STTUpdateSettingsFrame,
-    UserStartedSpeakingFrame,
-    UserStoppedSpeakingFrame,
+    VADUserStartedSpeakingFrame,
+    VADUserStoppedSpeakingFrame,
 )
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.ai_service import AIService
@@ -252,20 +252,15 @@ class SegmentedSTTService(STTService):
        """Process frames, handling VAD events and audio segmentation."""
        await super().process_frame(frame, direction)

-        if isinstance(frame, UserStartedSpeakingFrame):
+        if isinstance(frame, VADUserStartedSpeakingFrame):
            await self._handle_user_started_speaking(frame)
-        elif isinstance(frame, UserStoppedSpeakingFrame):
+        elif isinstance(frame, VADUserStoppedSpeakingFrame):
            await self._handle_user_stopped_speaking(frame)

-    async def _handle_user_started_speaking(self, frame: UserStartedSpeakingFrame):
-        if frame.emulated:
-            return
+    async def _handle_user_started_speaking(self, frame: VADUserStartedSpeakingFrame):
        self._user_speaking = True

-    async def _handle_user_stopped_speaking(self, frame: UserStoppedSpeakingFrame):
-        if frame.emulated:
-            return
-
+    async def _handle_user_stopped_speaking(self, frame: VADUserStoppedSpeakingFrame):
        self._user_speaking = False

        content = io.BytesIO()