From 12eb9437c12f6681a29df33bc90fc720b4612c87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= <aleix@daily.co>
Date: Thu, 17 Oct 2024 15:34:14 -0700
Subject: [PATCH 1/2] services(stt): allow STT service to passthrough audio

---
 CHANGELOG.md                        | 3 +++
 src/pipecat/services/ai_services.py | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ddc78b7e2..4dcefc96b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Added `audio_passthrough` parameter to `STTService`. If enabled it allows
+  audio frames to be pushed downstream in case other processors need them.
+
 - Added input parameter options for `PlayHTTTSService` and
   `PlayHTHttpTTSService`.
 
diff --git a/src/pipecat/services/ai_services.py b/src/pipecat/services/ai_services.py
index 8e11ad6ee..22ceffb7b 100644
--- a/src/pipecat/services/ai_services.py
+++ b/src/pipecat/services/ai_services.py
@@ -451,8 +451,9 @@ class WordTTSService(TTSService):
 class STTService(AIService):
     """STTService is a base class for speech-to-text services."""
 
-    def __init__(self, **kwargs):
+    def __init__(self, audio_passthrough=False, **kwargs):
         super().__init__(**kwargs)
+        self._audio_passthrough = audio_passthrough
         self._settings: Dict[str, Any] = {}
 
     @abstractmethod
@@ -490,8 +491,11 @@ class STTService(AIService):
 
         if isinstance(frame, AudioRawFrame):
             # In this service we accumulate audio internally and at the end we
-            # push a TextFrame. We don't really want to push audio frames down.
+            # push a TextFrame. We also push audio downstream in case someone
+            # else needs it.
             await self.process_audio_frame(frame)
+            if self._audio_passthrough:
+                await self.push_frame(frame, direction)
         elif isinstance(frame, STTUpdateSettingsFrame):
             await self._update_settings(frame.settings)
         else:

From c9318ecd5c9ab4aee03551a9626e36f5bb637ffd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= <aleix@daily.co>
Date: Thu, 17 Oct 2024 15:34:30 -0700
Subject: [PATCH 2/2] examples: minor fixes

---
 examples/foundational/07-interruptible-vad.py       | 2 +-
 examples/foundational/07c-interruptible-deepgram.py | 1 -
 examples/foundational/19-openai-realtime-beta.py    | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/foundational/07-interruptible-vad.py b/examples/foundational/07-interruptible-vad.py
index 716e3ee03..9ec380bd0 100644
--- a/examples/foundational/07-interruptible-vad.py
+++ b/examples/foundational/07-interruptible-vad.py
@@ -9,6 +9,7 @@ import aiohttp
 import os
 import sys
 
+from pipecat.audio.vad.silero import SileroVAD
 from pipecat.frames.frames import LLMMessagesFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
@@ -20,7 +21,6 @@ from pipecat.processors.aggregators.llm_response import (
 from pipecat.services.cartesia import CartesiaTTSService
 from pipecat.services.openai import OpenAILLMService
 from pipecat.transports.services.daily import DailyParams, DailyTransport
-from pipecat.vad.silero import SileroVAD
 
 from runner import configure
 
diff --git a/examples/foundational/07c-interruptible-deepgram.py b/examples/foundational/07c-interruptible-deepgram.py
index e913005e1..d232ad973 100644
--- a/examples/foundational/07c-interruptible-deepgram.py
+++ b/examples/foundational/07c-interruptible-deepgram.py
@@ -80,7 +80,6 @@ async def main():
 
         @transport.event_handler("on_first_participant_joined")
         async def on_first_participant_joined(transport, participant):
-            transport.capture_participant_transcription(participant["id"])
             # Kick off the conversation.
             messages.append({"role": "system", "content": "Please introduce yourself to the user."})
             await task.queue_frames([LLMMessagesFrame(messages)])
diff --git a/examples/foundational/19-openai-realtime-beta.py b/examples/foundational/19-openai-realtime-beta.py
index e9cb02f23..f258f95e4 100644
--- a/examples/foundational/19-openai-realtime-beta.py
+++ b/examples/foundational/19-openai-realtime-beta.py
@@ -15,7 +15,7 @@ from loguru import logger
 from runner import configure
 
 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.vad.vad_analyzer import VADParams
+from pipecat.audio.vad.vad_analyzer import VADParams
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask