Merge pull request #342 from pipecat-ai/aleix/base-output-transport-push-audio

transport(base_output): push audio downstream
2024-08-06 17:30:32 -07:00
parent d4979f5e64 881248cbd6
commit a3eb8337a6
2 changed files with 6 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added

+- `AudioRawFrame`s are not pushed downstream from the base output
+  transport. This allows capturing the exact words the bot says by adding an STT
+  service at the end of the pipeline.
+
 - Added new `GStreamerPipelineSource`. This processor can generate image or
  audio frames from a GStreamer pipeline (e.g. reading an MP4 file, and RTP
  stream or anything supported by GStreamer).
--- a/src/pipecat/transports/base_output.py
+++ b/src/pipecat/transports/base_output.py
@@ -203,6 +203,7 @@ class BaseOutputTransport(FrameProcessor):
                frame = await self._sink_queue.get()
                if isinstance(frame, AudioRawFrame):
                    await self.write_raw_audio_frames(frame.audio)
+                    await self._internal_push_frame(frame)
                    await self.push_frame(BotSpeakingFrame(), FrameDirection.UPSTREAM)
                elif isinstance(frame, ImageRawFrame):
                    await self._set_camera_image(frame)
@@ -329,6 +330,7 @@ class BaseOutputTransport(FrameProcessor):
            try:
                frame = await self._audio_out_queue.get()
                await self.write_raw_audio_frames(frame.audio)
+                await self._internal_push_frame(frame)
                await self.push_frame(BotSpeakingFrame(), FrameDirection.UPSTREAM)
            except asyncio.CancelledError:
                break