Compare commits
1 Commits
hush/realt
...
mb/stt-dir
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8ac6ac5efb |
@@ -9,6 +9,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- Added two new frames `RequestSTTMuteFrame`, `RequestSTTUnmuteFrame`. These
|
||||
frames tell the `STTMuteFilter` to directly mute or unmute the user and
|
||||
take precedent over the mute strategies when an `RequestSTTMuteFrame` is
|
||||
processed.
|
||||
|
||||
- `BaseOutputTransport` now allows multiple destinations if the transport
|
||||
implementation supports it (e.g. Daily's custom tracks). With multiple
|
||||
destinations it is possible to send different audio or video tracks with a
|
||||
|
||||
@@ -695,6 +695,20 @@ class STTMuteFrame(SystemFrame):
|
||||
mute: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequestSTTMuteFrame(Frame):
|
||||
"""Request to mute the STT service."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequestSTTUnmuteFrame(Frame):
|
||||
"""Request to unmute the STT service."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TransportMessageUrgentFrame(SystemFrame):
|
||||
message: Any
|
||||
|
||||
@@ -25,6 +25,8 @@ from pipecat.frames.frames import (
|
||||
FunctionCallResultFrame,
|
||||
InputAudioRawFrame,
|
||||
InterimTranscriptionFrame,
|
||||
RequestSTTMuteFrame,
|
||||
RequestSTTUnmuteFrame,
|
||||
StartFrame,
|
||||
StartInterruptionFrame,
|
||||
StopInterruptionFrame,
|
||||
@@ -101,6 +103,7 @@ class STTMuteFilter(FrameProcessor):
|
||||
self._bot_is_speaking = False
|
||||
self._function_call_in_progress = False
|
||||
self._is_muted = False # Initialize as unmuted, will set state on StartFrame if needed
|
||||
self._frame_requested_mute = False
|
||||
|
||||
@property
|
||||
def is_muted(self) -> bool:
|
||||
@@ -116,6 +119,10 @@ class STTMuteFilter(FrameProcessor):
|
||||
|
||||
async def _should_mute(self) -> bool:
|
||||
"""Determines if STT should be muted based on current state and strategy."""
|
||||
# First check if a RequestSTTMuteFrame was received
|
||||
if self._frame_requested_mute:
|
||||
return True
|
||||
|
||||
for strategy in self._config.strategies:
|
||||
match strategy:
|
||||
case STTMuteStrategy.FUNCTION_CALL:
|
||||
@@ -151,7 +158,13 @@ class STTMuteFilter(FrameProcessor):
|
||||
should_mute = None
|
||||
|
||||
# Process frames to determine mute state
|
||||
if isinstance(frame, StartFrame):
|
||||
if isinstance(frame, RequestSTTMuteFrame):
|
||||
self._frame_requested_mute = True
|
||||
should_mute = await self._should_mute()
|
||||
elif isinstance(frame, RequestSTTUnmuteFrame):
|
||||
self._frame_requested_mute = False
|
||||
should_mute = await self._should_mute()
|
||||
elif isinstance(frame, StartFrame):
|
||||
should_mute = await self._should_mute()
|
||||
elif isinstance(frame, FunctionCallInProgressFrame):
|
||||
self._function_call_in_progress = True
|
||||
|
||||
@@ -287,3 +287,50 @@ class TestSTTMuteFilter(unittest.IsolatedAsyncioTestCase):
|
||||
frames_to_send=frames_to_send,
|
||||
expected_down_frames=expected_returned_frames,
|
||||
)
|
||||
|
||||
async def test_direct_frame_muting(self):
|
||||
"""Test that RequestSTTMuteFrame and RequestSTTUnmuteFrame directly control muting."""
|
||||
from pipecat.frames.frames import RequestSTTMuteFrame, RequestSTTUnmuteFrame
|
||||
|
||||
# Create filter with no strategies to isolate direct frame control
|
||||
filter = STTMuteFilter(config=STTMuteConfig(strategies=set()))
|
||||
|
||||
frames_to_send = [
|
||||
# Initially unmuted - frames should pass through
|
||||
UserStartedSpeakingFrame(),
|
||||
InputAudioRawFrame(audio=b"", sample_rate=16000, num_channels=1),
|
||||
UserStoppedSpeakingFrame(),
|
||||
# Mute via frame - subsequent frames should be suppressed
|
||||
RequestSTTMuteFrame(),
|
||||
SleepFrame(sleep=0.1),
|
||||
UserStartedSpeakingFrame(), # Should be suppressed
|
||||
InputAudioRawFrame(
|
||||
audio=b"", sample_rate=16000, num_channels=1
|
||||
), # Should be suppressed
|
||||
UserStoppedSpeakingFrame(), # Should be suppressed
|
||||
# Unmute via frame - frames should pass through again
|
||||
RequestSTTUnmuteFrame(),
|
||||
SleepFrame(sleep=0.1),
|
||||
UserStartedSpeakingFrame(),
|
||||
InputAudioRawFrame(audio=b"", sample_rate=16000, num_channels=1),
|
||||
UserStoppedSpeakingFrame(),
|
||||
]
|
||||
|
||||
expected_returned_frames = [
|
||||
UserStartedSpeakingFrame,
|
||||
InputAudioRawFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
STTMuteFrame, # mute=True
|
||||
RequestSTTMuteFrame,
|
||||
STTMuteFrame, # mute=False
|
||||
RequestSTTUnmuteFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
InputAudioRawFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
]
|
||||
|
||||
await run_test(
|
||||
filter,
|
||||
frames_to_send=frames_to_send,
|
||||
expected_down_frames=expected_returned_frames,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user