From 222ccbb471193fd4bc404378ecca8f72d02f2d08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Sun, 14 Dec 2025 12:27:53 -0800 Subject: [PATCH] SegmentedSTTService: use VAD user started/stopped speaking frames --- src/pipecat/services/stt_service.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index f81848415..6c5741bfe 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -20,8 +20,8 @@ from pipecat.frames.frames import ( StartFrame, STTMuteFrame, STTUpdateSettingsFrame, - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, + VADUserStartedSpeakingFrame, + VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService @@ -252,20 +252,15 @@ class SegmentedSTTService(STTService): """Process frames, handling VAD events and audio segmentation.""" await super().process_frame(frame, direction) - if isinstance(frame, UserStartedSpeakingFrame): + if isinstance(frame, VADUserStartedSpeakingFrame): await self._handle_user_started_speaking(frame) - elif isinstance(frame, UserStoppedSpeakingFrame): + elif isinstance(frame, VADUserStoppedSpeakingFrame): await self._handle_user_stopped_speaking(frame) - async def _handle_user_started_speaking(self, frame: UserStartedSpeakingFrame): - if frame.emulated: - return + async def _handle_user_started_speaking(self, frame: VADUserStartedSpeakingFrame): self._user_speaking = True - async def _handle_user_stopped_speaking(self, frame: UserStoppedSpeakingFrame): - if frame.emulated: - return - + async def _handle_user_stopped_speaking(self, frame: VADUserStoppedSpeakingFrame): self._user_speaking = False content = io.BytesIO()