From 20a59e8c567af81dcef98e410aaf9e7ec1246680 Mon Sep 17 00:00:00 2001
From: Mark Backman <mark@daily.co>
Date: Wed, 30 Apr 2025 10:44:31 -0400
Subject: [PATCH] Add InterimTranscriptionFrame and TranscriptionFrame to
 STTMuteFilter frame processing

---
 CHANGELOG.md                                  |  4 ++
 .../processors/filters/stt_mute_filter.py     |  4 ++
 tests/test_stt_mute_filter.py                 | 41 +++++++++++++++++++
 3 files changed, 49 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1e5bb8849..e0153d3a0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
+- The `STTMuteFilter` now mutes `InterimTranscriptionFrame` and
+  `TranscriptionFrame` which allows the `STTMuteFilter` to be used in
+  conjunction with transports that generate transcripts, e.g. `DailyTransport`.
+
 - Function calls now receive a single parameter `FunctionCallParams` instead of
   `(function_name, tool_call_id, args, llm, context, result_callback)` which is
   now deprecated.
diff --git a/src/pipecat/processors/filters/stt_mute_filter.py b/src/pipecat/processors/filters/stt_mute_filter.py
index ae81acc1e..e85a3e581 100644
--- a/src/pipecat/processors/filters/stt_mute_filter.py
+++ b/src/pipecat/processors/filters/stt_mute_filter.py
@@ -24,10 +24,12 @@ from pipecat.frames.frames import (
     FunctionCallInProgressFrame,
     FunctionCallResultFrame,
     InputAudioRawFrame,
+    InterimTranscriptionFrame,
     StartFrame,
     StartInterruptionFrame,
     StopInterruptionFrame,
     STTMuteFrame,
+    TranscriptionFrame,
     UserStartedSpeakingFrame,
     UserStoppedSpeakingFrame,
 )
@@ -175,6 +177,8 @@ class STTMuteFilter(FrameProcessor):
                 UserStartedSpeakingFrame,
                 UserStoppedSpeakingFrame,
                 InputAudioRawFrame,
+                InterimTranscriptionFrame,
+                TranscriptionFrame,
             ),
         ):
             # Only pass VAD-related frames when not muted
diff --git a/tests/test_stt_mute_filter.py b/tests/test_stt_mute_filter.py
index a55c4609e..f0c0d7d57 100644
--- a/tests/test_stt_mute_filter.py
+++ b/tests/test_stt_mute_filter.py
@@ -12,7 +12,9 @@ from pipecat.frames.frames import (
     FunctionCallInProgressFrame,
     FunctionCallResultFrame,
     InputAudioRawFrame,
+    InterimTranscriptionFrame,
     STTMuteFrame,
+    TranscriptionFrame,
     UserStartedSpeakingFrame,
     UserStoppedSpeakingFrame,
 )
@@ -100,6 +102,45 @@ class TestSTTMuteFilter(unittest.IsolatedAsyncioTestCase):
             expected_down_frames=expected_returned_frames,
         )
 
+    async def test_transcription_frames_with_always_strategy(self):
+        filter = STTMuteFilter(config=STTMuteConfig(strategies={STTMuteStrategy.ALWAYS}))
+
+        frames_to_send = [
+            # Bot speaking - should mute
+            BotStartedSpeakingFrame(),
+            SleepFrame(sleep=0.1),  # Wait for StartedSpeaking to process
+            InterimTranscriptionFrame(
+                user_id="user1", text="This should be suppressed", timestamp="1234567890"
+            ),
+            TranscriptionFrame(
+                user_id="user1", text="This should be suppressed", timestamp="1234567890"
+            ),
+            SleepFrame(sleep=0.1),  # Wait for transcription frames to queue
+            BotStoppedSpeakingFrame(),
+            # Bot not speaking - should pass through
+            InterimTranscriptionFrame(
+                user_id="user1", text="This should pass", timestamp="1234567891"
+            ),
+            TranscriptionFrame(
+                user_id="user1", text="This should pass through", timestamp="1234567891"
+            ),
+        ]
+
+        expected_returned_frames = [
+            BotStartedSpeakingFrame,
+            STTMuteFrame,  # mute=True
+            BotStoppedSpeakingFrame,
+            STTMuteFrame,  # mute=False
+            InterimTranscriptionFrame,  # Only passes through after bot stops speaking
+            TranscriptionFrame,  # Only passes through after bot stops speaking
+        ]
+
+        await run_test(
+            filter,
+            frames_to_send=frames_to_send,
+            expected_down_frames=expected_returned_frames,
+        )
+
     # TODO: Revisit once we figure out how to test SystemFrames and DataFrames
     # async def test_function_call_strategy(self):
     #     filter = STTMuteFilter(config=STTMuteConfig(strategies={STTMuteStrategy.FUNCTION_CALL}))