diff --git a/CHANGELOG.md b/CHANGELOG.md index 845c6b576..bef74dc06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed an issue that was causing `BotStoppedSpeakingFrame` to be generated too + late. This could then cause issues unblocking `STTMuteFilter` later than + desired. + - Fixed an issue that was causing `AudioBufferProcessor` to not record synchronized audio. diff --git a/src/pipecat/transports/base_output.py b/src/pipecat/transports/base_output.py index 22cdff8ed..6bf4a9ff3 100644 --- a/src/pipecat/transports/base_output.py +++ b/src/pipecat/transports/base_output.py @@ -14,7 +14,6 @@ from loguru import logger from PIL import Image from pipecat.audio.utils import create_default_resampler -from pipecat.audio.vad.vad_analyzer import VAD_STOP_SECS from pipecat.frames.frames import ( BotSpeakingFrame, BotStartedSpeakingFrame, @@ -38,6 +37,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.transports.base_transport import TransportParams from pipecat.utils.time import nanoseconds_to_seconds +BOT_VAD_STOP_SECS = 0.3 + class BaseOutputTransport(FrameProcessor): def __init__(self, params: TransportParams, **kwargs): @@ -321,15 +322,10 @@ class BaseOutputTransport(FrameProcessor): ) yield frame - vad_stop_secs = ( - self._params.vad_analyzer.params.stop_secs - if self._params.vad_analyzer - else VAD_STOP_SECS - ) if self._params.audio_out_mixer: - return with_mixer(vad_stop_secs) + return with_mixer(BOT_VAD_STOP_SECS) else: - return without_mixer(vad_stop_secs) + return without_mixer(BOT_VAD_STOP_SECS) async def _sink_task_handler(self): async for frame in self._next_frame():