Merge pull request #1213 from pipecat-ai/aleix/base-transport-output-bot-vad-stop-secs

BaseOutputTransport: use specific VAD stop secs for the bot
2025-02-12 19:01:56 -08:00
parent 2e87a019a8 f110a45c85
commit 68aaa1f8f4
2 changed files with 8 additions and 8 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -76,6 +76,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Fixed

+- Fixed an issue that was causing `BotStoppedSpeakingFrame` to be generated too
+  late. This could then cause issues unblocking `STTMuteFilter` later than
+  desired.
+
 - Fixed an issue that was causing `AudioBufferProcessor` to not record
  synchronized audio.

--- a/src/pipecat/transports/base_output.py
+++ b/src/pipecat/transports/base_output.py
@@ -14,7 +14,6 @@ from loguru import logger
 from PIL import Image

 from pipecat.audio.utils import create_default_resampler
-from pipecat.audio.vad.vad_analyzer import VAD_STOP_SECS
 from pipecat.frames.frames import (
    BotSpeakingFrame,
    BotStartedSpeakingFrame,
@@ -38,6 +37,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.transports.base_transport import TransportParams
 from pipecat.utils.time import nanoseconds_to_seconds

+BOT_VAD_STOP_SECS = 0.3
+

 class BaseOutputTransport(FrameProcessor):
    def __init__(self, params: TransportParams, **kwargs):
@@ -321,15 +322,10 @@ class BaseOutputTransport(FrameProcessor):
                    )
                    yield frame

-        vad_stop_secs = (
-            self._params.vad_analyzer.params.stop_secs
-            if self._params.vad_analyzer
-            else VAD_STOP_SECS
-        )
        if self._params.audio_out_mixer:
-            return with_mixer(vad_stop_secs)
+            return with_mixer(BOT_VAD_STOP_SECS)
        else:
-            return without_mixer(vad_stop_secs)
+            return without_mixer(BOT_VAD_STOP_SECS)

    async def _sink_task_handler(self):
        async for frame in self._next_frame():