Fix: SimliVideoService was continuously emitting audio, preventing BotStoppedSpeakingFrame from being sent

2025-05-02 16:26:23 -04:00
parent b60691c7b2
commit eeaa9f67a1
3 changed files with 14 additions and 7 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -136,6 +136,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Fixed

+- Fixed an issue with `SimliVideoService` where the bot was continuously outputting
+  audio, which prevents the `BotStoppedSpeakingFrame` from being emitted.
+
 - Fixed an issue where `OpenAIRealtimeBetaLLMService` would add two assistant
  messages to the context.

--- a/examples/foundational/27-simli-layer.py
+++ b/examples/foundational/27-simli-layer.py
@@ -36,6 +36,7 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac
            audio_in_enabled=True,
            audio_out_enabled=True,
            video_out_enabled=True,
+            video_out_is_live=True,
            video_out_width=512,
            video_out_height=512,
            vad_analyzer=SileroVADAnalyzer(),
--- a/src/pipecat/services/simli/video.py
+++ b/src/pipecat/services/simli/video.py
@@ -64,13 +64,16 @@ class SimliVideoService(FrameProcessor):
        async for audio_frame in self._simli_client.getAudioStreamIterator():
            resampled_frames = self._pipecat_resampler.resample(audio_frame)
            for resampled_frame in resampled_frames:
-                await self.push_frame(
-                    TTSAudioRawFrame(
-                        audio=resampled_frame.to_ndarray().tobytes(),
-                        sample_rate=self._pipecat_resampler.rate,
-                        num_channels=1,
-                    ),
-                )
+                audio_array = resampled_frame.to_ndarray()
+                # Only push frame is there is audio (e.g. not silence)
+                if audio_array.any():
+                    await self.push_frame(
+                        TTSAudioRawFrame(
+                            audio=audio_array.tobytes(),
+                            sample_rate=self._pipecat_resampler.rate,
+                            num_channels=1,
+                        ),
+                    )

    async def _consume_and_process_video(self):
        await self._pipecat_resampler_event.wait()