From 9d9f10ae0e05348a2502bb08e82b2ac76a4fd390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 8 Sep 2025 19:07:25 -0700 Subject: [PATCH] frames: StartInterruptionFrame is deprecated, use InterruptionFrame --- CHANGELOG.md | 2 ++ .../07c-interruptible-deepgram-vad.py | 4 +-- .../07s-interruptible-google-audio-in.py | 6 ++-- .../22b-natural-conversation-proposal.py | 6 ++-- .../22c-natural-conversation-mixed-llms.py | 6 ++-- .../22d-natural-conversation-gemini-audio.py | 4 +-- examples/foundational/30-observer.py | 4 +-- src/pipecat/frames/frames.py | 30 ++++++++++++++++++- .../processors/aggregators/llm_response.py | 8 ++--- .../aggregators/llm_response_universal.py | 6 ++-- .../processors/filters/stt_mute_filter.py | 4 +-- src/pipecat/processors/frame_processor.py | 4 +-- .../processors/transcript_processor.py | 4 +-- src/pipecat/serializers/exotel.py | 4 +-- src/pipecat/serializers/plivo.py | 4 +-- src/pipecat/serializers/telnyx.py | 4 +-- src/pipecat/serializers/twilio.py | 4 +-- src/pipecat/services/asyncai/tts.py | 4 +-- .../services/aws_nova_sonic/context.py | 4 +-- src/pipecat/services/cartesia/tts.py | 4 +-- src/pipecat/services/elevenlabs/tts.py | 8 ++--- src/pipecat/services/fish/tts.py | 4 +-- .../services/gemini_multimodal_live/gemini.py | 4 +-- src/pipecat/services/llm_service.py | 6 ++-- src/pipecat/services/lmnt/tts.py | 4 +-- src/pipecat/services/neuphonic/tts.py | 4 +-- .../services/openai_realtime/openai.py | 6 ++-- .../services/openai_realtime_beta/openai.py | 6 ++-- src/pipecat/services/playht/tts.py | 4 +-- src/pipecat/services/rime/tts.py | 7 ++--- src/pipecat/services/sarvam/tts.py | 4 +-- src/pipecat/services/simli/video.py | 4 +-- src/pipecat/services/tavus/video.py | 4 +-- src/pipecat/services/tts_service.py | 18 +++++------ src/pipecat/transports/base_input.py | 6 ++-- src/pipecat/transports/base_output.py | 8 ++--- src/pipecat/transports/tavus/transport.py | 4 +-- src/pipecat/transports/websocket/fastapi.py | 4 +-- src/pipecat/transports/websocket/server.py | 4 +-- tests/test_context_aggregators.py | 6 ++-- tests/test_llm_response.py | 6 ++-- tests/test_transcript_processor.py | 6 ++-- 42 files changed, 135 insertions(+), 108 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da1f08eaf..e912e5239 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Deprecated +- `StartInterruptionFrame` is now deprected, use `InterruptionFrame` instead. + - Deprecate `VisionImageFrameAggregator` because `VisionImageRawFrame` has been removed. See the `12*` examples for the new recommended replacement pattern. diff --git a/examples/foundational/07c-interruptible-deepgram-vad.py b/examples/foundational/07c-interruptible-deepgram-vad.py index 3569fc440..3f9330bbd 100644 --- a/examples/foundational/07c-interruptible-deepgram-vad.py +++ b/examples/foundational/07c-interruptible-deepgram-vad.py @@ -12,8 +12,8 @@ from dotenv import load_dotenv from loguru import logger from pipecat.frames.frames import ( + InterruptionFrame, LLMRunFrame, - StartInterruptionFrame, UserStartedSpeakingFrame, UserStoppedSpeakingFrame, ) @@ -97,7 +97,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): @stt.event_handler("on_speech_started") async def on_speech_started(stt, *args, **kwargs): - await task.queue_frames([StartInterruptionFrame(), UserStartedSpeakingFrame()]) + await task.queue_frames([InterruptionFrame(), UserStartedSpeakingFrame()]) @stt.event_handler("on_utterance_end") async def on_utterance_end(stt, *args, **kwargs): diff --git a/examples/foundational/07s-interruptible-google-audio-in.py b/examples/foundational/07s-interruptible-google-audio-in.py index d7aaf07bd..0265c969f 100644 --- a/examples/foundational/07s-interruptible-google-audio-in.py +++ b/examples/foundational/07s-interruptible-google-audio-in.py @@ -16,10 +16,10 @@ from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import ( Frame, InputAudioRawFrame, + InterruptionFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMRunFrame, - StartInterruptionFrame, TextFrame, TranscriptionFrame, UserStartedSpeakingFrame, @@ -181,9 +181,7 @@ class TranscriptionContextFixup(FrameProcessor): if isinstance(frame, MagicDemoTranscriptionFrame): self._transcript = frame.text - elif isinstance(frame, LLMFullResponseEndFrame) or isinstance( - frame, StartInterruptionFrame - ): + elif isinstance(frame, LLMFullResponseEndFrame) or isinstance(frame, InterruptionFrame): self.swap_user_audio() self.add_transcript_back_to_inference_output() self._transcript = "" diff --git a/examples/foundational/22b-natural-conversation-proposal.py b/examples/foundational/22b-natural-conversation-proposal.py index dc70d0379..417aeca76 100644 --- a/examples/foundational/22b-natural-conversation-proposal.py +++ b/examples/foundational/22b-natural-conversation-proposal.py @@ -18,9 +18,9 @@ from pipecat.frames.frames import ( Frame, FunctionCallInProgressFrame, FunctionCallResultFrame, + InterruptionFrame, LLMRunFrame, StartFrame, - StartInterruptionFrame, SystemFrame, TextFrame, TranscriptionFrame, @@ -144,7 +144,7 @@ class OutputGate(FrameProcessor): await self._start() if isinstance(frame, (EndFrame, CancelFrame)): await self._stop() - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): self._frames_buffer = [] self.close_gate() await self.push_frame(frame, direction) @@ -232,7 +232,7 @@ class TurnDetectionLLM(Pipeline): async def pass_only_llm_trigger_frames(frame): return ( isinstance(frame, OpenAILLMContextFrame) - or isinstance(frame, StartInterruptionFrame) + or isinstance(frame, InterruptionFrame) or isinstance(frame, FunctionCallInProgressFrame) or isinstance(frame, FunctionCallResultFrame) ) diff --git a/examples/foundational/22c-natural-conversation-mixed-llms.py b/examples/foundational/22c-natural-conversation-mixed-llms.py index 44f3f1349..e4c554b26 100644 --- a/examples/foundational/22c-natural-conversation-mixed-llms.py +++ b/examples/foundational/22c-natural-conversation-mixed-llms.py @@ -18,9 +18,9 @@ from pipecat.frames.frames import ( Frame, FunctionCallInProgressFrame, FunctionCallResultFrame, + InterruptionFrame, LLMRunFrame, StartFrame, - StartInterruptionFrame, SystemFrame, TextFrame, TranscriptionFrame, @@ -347,7 +347,7 @@ class OutputGate(FrameProcessor): await self._start() if isinstance(frame, (EndFrame, CancelFrame)): await self._stop() - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): self._frames_buffer = [] self.close_gate() await self.push_frame(frame, direction) @@ -426,7 +426,7 @@ class TurnDetectionLLM(Pipeline): async def pass_only_llm_trigger_frames(frame): return ( isinstance(frame, OpenAILLMContextFrame) - or isinstance(frame, StartInterruptionFrame) + or isinstance(frame, InterruptionFrame) or isinstance(frame, FunctionCallInProgressFrame) or isinstance(frame, FunctionCallResultFrame) ) diff --git a/examples/foundational/22d-natural-conversation-gemini-audio.py b/examples/foundational/22d-natural-conversation-gemini-audio.py index 5eed49092..d7ecf2ba7 100644 --- a/examples/foundational/22d-natural-conversation-gemini-audio.py +++ b/examples/foundational/22d-natural-conversation-gemini-audio.py @@ -20,10 +20,10 @@ from pipecat.frames.frames import ( FunctionCallInProgressFrame, FunctionCallResultFrame, InputAudioRawFrame, + InterruptionFrame, LLMFullResponseStartFrame, LLMRunFrame, StartFrame, - StartInterruptionFrame, SystemFrame, TextFrame, TranscriptionFrame, @@ -570,7 +570,7 @@ class OutputGate(FrameProcessor): await self._start() if isinstance(frame, (EndFrame, CancelFrame)): await self._stop() - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): self._frames_buffer = [] self.close_gate() await self.push_frame(frame, direction) diff --git a/examples/foundational/30-observer.py b/examples/foundational/30-observer.py index 34557ca56..62efd0d3e 100644 --- a/examples/foundational/30-observer.py +++ b/examples/foundational/30-observer.py @@ -15,8 +15,8 @@ from pipecat.frames.frames import ( BotStartedSpeakingFrame, BotStoppedSpeakingFrame, EndFrame, + InterruptionFrame, LLMRunFrame, - StartInterruptionFrame, TTSTextFrame, UserStartedSpeakingFrame, ) @@ -69,7 +69,7 @@ class CustomObserver(BaseObserver): # Create direction arrow arrow = "→" if direction == FrameDirection.DOWNSTREAM else "←" - if isinstance(frame, StartInterruptionFrame) and isinstance(src, BaseOutputTransport): + if isinstance(frame, InterruptionFrame) and isinstance(src, BaseOutputTransport): logger.info(f"⚡ INTERRUPTION START: {src} {arrow} {dst} at {time_sec:.2f}s") elif isinstance(frame, BotStartedSpeakingFrame): logger.info(f"🤖 BOT START SPEAKING: {src} {arrow} {dst} at {time_sec:.2f}s") diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 288048de0..309fa153e 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -820,7 +820,7 @@ class FrameProcessorResumeUrgentFrame(SystemFrame): @dataclass -class StartInterruptionFrame(SystemFrame): +class InterruptionFrame(SystemFrame): """Frame indicating user started speaking (interruption detected). Emitted by the BaseInputTransport to indicate that a user has started @@ -832,6 +832,34 @@ class StartInterruptionFrame(SystemFrame): pass +@dataclass +class StartInterruptionFrame(InterruptionFrame): + """Frame indicating user started speaking (interruption detected). + + .. deprecated:: 0.0.85 + This frame is deprecated and will be removed in a future version. + Instead, use `InterruptionFrame`. + + Emitted by the BaseInputTransport to indicate that a user has started + speaking (i.e. is interrupting). This is similar to + UserStartedSpeakingFrame except that it should be pushed concurrently + with other frames (so the order is not guaranteed). + """ + + def __post_init__(self): + super().__post_init__() + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "StartInterruptionFrame is deprecated and will be removed in a future version. " + "Instead, use InterruptionFrame.", + DeprecationWarning, + stacklevel=2, + ) + + @dataclass class UserStartedSpeakingFrame(SystemFrame): """Frame indicating user has started speaking. diff --git a/src/pipecat/processors/aggregators/llm_response.py b/src/pipecat/processors/aggregators/llm_response.py index d058a4334..1f9bb9b71 100644 --- a/src/pipecat/processors/aggregators/llm_response.py +++ b/src/pipecat/processors/aggregators/llm_response.py @@ -36,6 +36,7 @@ from pipecat.frames.frames import ( FunctionCallsStartedFrame, InputAudioRawFrame, InterimTranscriptionFrame, + InterruptionFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMMessagesAppendFrame, @@ -48,7 +49,6 @@ from pipecat.frames.frames import ( OpenAILLMContextAssistantTimestampFrame, SpeechControlParamsFrame, StartFrame, - StartInterruptionFrame, TextFrame, TranscriptionFrame, UserImageRawFrame, @@ -138,7 +138,7 @@ class LLMFullResponseAggregator(FrameProcessor): """ await super().process_frame(frame, direction) - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): await self._call_event_handler("on_completion", self._aggregation, False) self._aggregation = "" self._started = False @@ -838,7 +838,7 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator): """ await super().process_frame(frame, direction) - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): await self._handle_interruptions(frame) await self.push_frame(frame, direction) elif isinstance(frame, LLMFullResponseStartFrame): @@ -904,7 +904,7 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator): if frame.run_llm: await self.push_context_frame(FrameDirection.UPSTREAM) - async def _handle_interruptions(self, frame: StartInterruptionFrame): + async def _handle_interruptions(self, frame: InterruptionFrame): await self.push_aggregation() self._started = 0 await self.reset() diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index bda761ebd..0e14e7f10 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -37,6 +37,7 @@ from pipecat.frames.frames import ( FunctionCallsStartedFrame, InputAudioRawFrame, InterimTranscriptionFrame, + InterruptionFrame, LLMContextAssistantTimestampFrame, LLMContextFrame, LLMFullResponseEndFrame, @@ -48,7 +49,6 @@ from pipecat.frames.frames import ( LLMSetToolsFrame, SpeechControlParamsFrame, StartFrame, - StartInterruptionFrame, TextFrame, TranscriptionFrame, UserImageRawFrame, @@ -579,7 +579,7 @@ class LLMAssistantAggregator(LLMContextAggregator): """ await super().process_frame(frame, direction) - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): await self._handle_interruptions(frame) await self.push_frame(frame, direction) elif isinstance(frame, LLMFullResponseStartFrame): @@ -645,7 +645,7 @@ class LLMAssistantAggregator(LLMContextAggregator): if frame.run_llm: await self.push_context_frame(FrameDirection.UPSTREAM) - async def _handle_interruptions(self, frame: StartInterruptionFrame): + async def _handle_interruptions(self, frame: InterruptionFrame): await self._push_aggregation() self._started = 0 await self.reset() diff --git a/src/pipecat/processors/filters/stt_mute_filter.py b/src/pipecat/processors/filters/stt_mute_filter.py index d6baac1f7..613d1ef51 100644 --- a/src/pipecat/processors/filters/stt_mute_filter.py +++ b/src/pipecat/processors/filters/stt_mute_filter.py @@ -25,8 +25,8 @@ from pipecat.frames.frames import ( FunctionCallResultFrame, InputAudioRawFrame, InterimTranscriptionFrame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, STTMuteFrame, TranscriptionFrame, UserStartedSpeakingFrame, @@ -204,7 +204,7 @@ class STTMuteFilter(FrameProcessor): if isinstance( frame, ( - StartInterruptionFrame, + InterruptionFrame, VADUserStartedSpeakingFrame, VADUserStoppedSpeakingFrame, UserStartedSpeakingFrame, diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index cac1a0806..0b30204d4 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -28,8 +28,8 @@ from pipecat.frames.frames import ( FrameProcessorPauseUrgentFrame, FrameProcessorResumeFrame, FrameProcessorResumeUrgentFrame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, SystemFrame, ) from pipecat.metrics.metrics import LLMTokenUsage, MetricsData @@ -588,7 +588,7 @@ class FrameProcessor(BaseObject): if isinstance(frame, StartFrame): await self.__start(frame) - elif isinstance(frame, StartInterruptionFrame): + elif isinstance(frame, InterruptionFrame): await self._start_interruption() await self.stop_all_metrics() elif isinstance(frame, CancelFrame): diff --git a/src/pipecat/processors/transcript_processor.py b/src/pipecat/processors/transcript_processor.py index 424374faf..9a57aad35 100644 --- a/src/pipecat/processors/transcript_processor.py +++ b/src/pipecat/processors/transcript_processor.py @@ -19,7 +19,7 @@ from pipecat.frames.frames import ( CancelFrame, EndFrame, Frame, - StartInterruptionFrame, + InterruptionFrame, TranscriptionFrame, TranscriptionMessage, TranscriptionUpdateFrame, @@ -195,7 +195,7 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor): """ await super().process_frame(frame, direction) - if isinstance(frame, (StartInterruptionFrame, CancelFrame)): + if isinstance(frame, (InterruptionFrame, CancelFrame)): # Push frame first otherwise our emitted transcription update frame # might get cleaned up. await self.push_frame(frame, direction) diff --git a/src/pipecat/serializers/exotel.py b/src/pipecat/serializers/exotel.py index 9ed342631..1a5859211 100644 --- a/src/pipecat/serializers/exotel.py +++ b/src/pipecat/serializers/exotel.py @@ -20,8 +20,8 @@ from pipecat.frames.frames import ( Frame, InputAudioRawFrame, InputDTMFFrame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, TransportMessageFrame, TransportMessageUrgentFrame, ) @@ -98,7 +98,7 @@ class ExotelFrameSerializer(FrameSerializer): Returns: Serialized data as string or bytes, or None if the frame isn't handled. """ - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): answer = {"event": "clear", "streamSid": self._stream_sid} return json.dumps(answer) elif isinstance(frame, AudioRawFrame): diff --git a/src/pipecat/serializers/plivo.py b/src/pipecat/serializers/plivo.py index aa8b4b27e..519e0893a 100644 --- a/src/pipecat/serializers/plivo.py +++ b/src/pipecat/serializers/plivo.py @@ -22,8 +22,8 @@ from pipecat.frames.frames import ( Frame, InputAudioRawFrame, InputDTMFFrame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, TransportMessageFrame, TransportMessageUrgentFrame, ) @@ -122,7 +122,7 @@ class PlivoFrameSerializer(FrameSerializer): self._hangup_attempted = True await self._hang_up_call() return None - elif isinstance(frame, StartInterruptionFrame): + elif isinstance(frame, InterruptionFrame): answer = {"event": "clearAudio", "streamId": self._stream_id} return json.dumps(answer) elif isinstance(frame, AudioRawFrame): diff --git a/src/pipecat/serializers/telnyx.py b/src/pipecat/serializers/telnyx.py index 467c01ba2..7603c9bf7 100644 --- a/src/pipecat/serializers/telnyx.py +++ b/src/pipecat/serializers/telnyx.py @@ -29,8 +29,8 @@ from pipecat.frames.frames import ( Frame, InputAudioRawFrame, InputDTMFFrame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, ) from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType @@ -137,7 +137,7 @@ class TelnyxFrameSerializer(FrameSerializer): self._hangup_attempted = True await self._hang_up_call() return None - elif isinstance(frame, StartInterruptionFrame): + elif isinstance(frame, InterruptionFrame): answer = {"event": "clear"} return json.dumps(answer) elif isinstance(frame, AudioRawFrame): diff --git a/src/pipecat/serializers/twilio.py b/src/pipecat/serializers/twilio.py index 57e7c8dba..f83787468 100644 --- a/src/pipecat/serializers/twilio.py +++ b/src/pipecat/serializers/twilio.py @@ -22,8 +22,8 @@ from pipecat.frames.frames import ( Frame, InputAudioRawFrame, InputDTMFFrame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, TransportMessageFrame, TransportMessageUrgentFrame, ) @@ -122,7 +122,7 @@ class TwilioFrameSerializer(FrameSerializer): self._hangup_attempted = True await self._hang_up_call() return None - elif isinstance(frame, StartInterruptionFrame): + elif isinstance(frame, InterruptionFrame): answer = {"event": "clear", "streamSid": self._stream_sid} return json.dumps(answer) elif isinstance(frame, AudioRawFrame): diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index d536263bd..a453d6820 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -20,8 +20,8 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, @@ -275,7 +275,7 @@ class AsyncAITTSService(InterruptibleTTSService): direction: The direction to push the frame. """ await super().push_frame(frame, direction) - if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)): + if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)): self._started = False async def _receive_messages(self): diff --git a/src/pipecat/services/aws_nova_sonic/context.py b/src/pipecat/services/aws_nova_sonic/context.py index e23a18362..0ce5ce033 100644 --- a/src/pipecat/services/aws_nova_sonic/context.py +++ b/src/pipecat/services/aws_nova_sonic/context.py @@ -21,13 +21,13 @@ from pipecat.frames.frames import ( DataFrame, Frame, FunctionCallResultFrame, + InterruptionFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMMessagesAppendFrame, LLMMessagesUpdateFrame, LLMSetToolChoiceFrame, LLMSetToolsFrame, - StartInterruptionFrame, TextFrame, UserImageRawFrame, ) @@ -306,7 +306,7 @@ class AWSNovaSonicAssistantContextAggregator(OpenAIAssistantContextAggregator): if isinstance( frame, ( - StartInterruptionFrame, + InterruptionFrame, LLMFullResponseStartFrame, LLMFullResponseEndFrame, TextFrame, diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index 5efda600c..3b81da5d4 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -20,8 +20,8 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, @@ -371,7 +371,7 @@ class CartesiaTTSService(AudioContextWordTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): await super()._handle_interruption(frame, direction) await self.stop_all_metrics() if self._context_id: diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index 8551fc5de..ab37a9add 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -25,9 +25,9 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, + InterruptionFrame, LLMFullResponseEndFrame, StartFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, @@ -460,7 +460,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): direction: The direction to push the frame. """ await super().push_frame(frame, direction) - if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)): + if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)): self._started = False if isinstance(frame, TTSStoppedFrame): await self.add_word_timestamps([("Reset", 0)]) @@ -549,7 +549,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): """Handle interruption by closing the current context.""" await super()._handle_interruption(frame, direction) @@ -856,7 +856,7 @@ class ElevenLabsHttpTTSService(WordTTSService): direction: The direction to push the frame. """ await super().push_frame(frame, direction) - if isinstance(frame, (StartInterruptionFrame, TTSStoppedFrame)): + if isinstance(frame, (InterruptionFrame, TTSStoppedFrame)): # Reset timing on interruption or stop self._reset_state() diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 305c14884..b39b775e5 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -21,8 +21,8 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, @@ -259,7 +259,7 @@ class FishAudioTTSService(InterruptibleTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): await super()._handle_interruption(frame, direction) await self.stop_all_metrics() self._request_id = None diff --git a/src/pipecat/services/gemini_multimodal_live/gemini.py b/src/pipecat/services/gemini_multimodal_live/gemini.py index 106f668e9..df560358f 100644 --- a/src/pipecat/services/gemini_multimodal_live/gemini.py +++ b/src/pipecat/services/gemini_multimodal_live/gemini.py @@ -33,6 +33,7 @@ from pipecat.frames.frames import ( InputAudioRawFrame, InputImageRawFrame, InputTextRawFrame, + InterruptionFrame, LLMContextFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, @@ -41,7 +42,6 @@ from pipecat.frames.frames import ( LLMTextFrame, LLMUpdateSettingsFrame, StartFrame, - StartInterruptionFrame, TranscriptionFrame, TTSAudioRawFrame, TTSStartedFrame, @@ -752,7 +752,7 @@ class GeminiMultimodalLiveLLMService(LLMService): elif isinstance(frame, InputImageRawFrame): await self._send_user_video(frame) await self.push_frame(frame, direction) - elif isinstance(frame, StartInterruptionFrame): + elif isinstance(frame, InterruptionFrame): await self._handle_interruption() await self.push_frame(frame, direction) elif isinstance(frame, UserStartedSpeakingFrame): diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index 502570a83..e309609fe 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -36,12 +36,12 @@ from pipecat.frames.frames import ( FunctionCallResultFrame, FunctionCallResultProperties, FunctionCallsStartedFrame, + InterruptionFrame, LLMConfigureOutputFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMTextFrame, StartFrame, - StartInterruptionFrame, UserImageRequestFrame, ) from pipecat.processors.aggregators.llm_context import LLMContext @@ -269,7 +269,7 @@ class LLMService(AIService): """ await super().process_frame(frame, direction) - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): await self._handle_interruptions(frame) elif isinstance(frame, LLMConfigureOutputFrame): self._skip_tts = frame.skip_tts @@ -286,7 +286,7 @@ class LLMService(AIService): await super().push_frame(frame, direction) - async def _handle_interruptions(self, _: StartInterruptionFrame): + async def _handle_interruptions(self, _: InterruptionFrame): for function_name, entry in self._functions.items(): if entry.cancel_on_interruption: await self._cancel_function_call(function_name) diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 187ef6b84..a602789fd 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -16,8 +16,8 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, @@ -180,7 +180,7 @@ class LmntTTSService(InterruptibleTTSService): direction: The direction to push the frame. """ await super().push_frame(frame, direction) - if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)): + if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)): self._started = False async def _connect(self): diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index f4777b8d9..46d805086 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -25,9 +25,9 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, + InterruptionFrame, LLMFullResponseEndFrame, StartFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSSpeakFrame, TTSStartedFrame, @@ -224,7 +224,7 @@ class NeuphonicTTSService(InterruptibleTTSService): direction: The direction to push the frame. """ await super().push_frame(frame, direction) - if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)): + if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)): self._started = False async def process_frame(self, frame: Frame, direction: FrameDirection): diff --git a/src/pipecat/services/openai_realtime/openai.py b/src/pipecat/services/openai_realtime/openai.py index 67e3ac9e6..67fd18308 100644 --- a/src/pipecat/services/openai_realtime/openai.py +++ b/src/pipecat/services/openai_realtime/openai.py @@ -23,6 +23,7 @@ from pipecat.frames.frames import ( Frame, InputAudioRawFrame, InterimTranscriptionFrame, + InterruptionFrame, LLMContextFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, @@ -31,7 +32,6 @@ from pipecat.frames.frames import ( LLMTextFrame, LLMUpdateSettingsFrame, StartFrame, - StartInterruptionFrame, TranscriptionFrame, TTSAudioRawFrame, TTSStartedFrame, @@ -366,7 +366,7 @@ class OpenAIRealtimeLLMService(LLMService): elif isinstance(frame, InputAudioRawFrame): if not self._audio_input_paused: await self._send_user_audio(frame) - elif isinstance(frame, StartInterruptionFrame): + elif isinstance(frame, InterruptionFrame): await self._handle_interruption() elif isinstance(frame, UserStartedSpeakingFrame): await self._handle_user_started_speaking(frame) @@ -717,7 +717,7 @@ class OpenAIRealtimeLLMService(LLMService): async def _handle_evt_speech_started(self, evt): await self._truncate_current_audio_response() await self._start_interruption() # cancels this processor task - await self.push_frame(StartInterruptionFrame()) # cancels downstream tasks + await self.push_frame(InterruptionFrame()) # cancels downstream tasks await self.push_frame(UserStartedSpeakingFrame()) async def _handle_evt_speech_stopped(self, evt): diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index ef0ea92d6..84bcd039b 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -24,6 +24,7 @@ from pipecat.frames.frames import ( Frame, InputAudioRawFrame, InterimTranscriptionFrame, + InterruptionFrame, LLMContextFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, @@ -32,7 +33,6 @@ from pipecat.frames.frames import ( LLMTextFrame, LLMUpdateSettingsFrame, StartFrame, - StartInterruptionFrame, TranscriptionFrame, TTSAudioRawFrame, TTSStartedFrame, @@ -364,7 +364,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): elif isinstance(frame, InputAudioRawFrame): if not self._audio_input_paused: await self._send_user_audio(frame) - elif isinstance(frame, StartInterruptionFrame): + elif isinstance(frame, InterruptionFrame): await self._handle_interruption() elif isinstance(frame, UserStartedSpeakingFrame): await self._handle_user_started_speaking(frame) @@ -659,7 +659,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): async def _handle_evt_speech_started(self, evt): await self._truncate_current_audio_response() await self._start_interruption() # cancels this processor task - await self.push_frame(StartInterruptionFrame()) # cancels downstream tasks + await self.push_frame(InterruptionFrame()) # cancels downstream tasks await self.push_frame(UserStartedSpeakingFrame()) async def _handle_evt_speech_stopped(self, evt): diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py index aa92df055..0f23b7b5e 100644 --- a/src/pipecat/services/playht/tts.py +++ b/src/pipecat/services/playht/tts.py @@ -25,8 +25,8 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, @@ -312,7 +312,7 @@ class PlayHTTTSService(InterruptibleTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): """Handle interruption by stopping metrics and clearing request ID.""" await super()._handle_interruption(frame, direction) await self.stop_all_metrics() diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index be979b7f9..917716545 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -24,15 +24,14 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, + InterruptionFrame, StartFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.tts_service import AudioContextWordTTSService, TTSService -from pipecat.transcriptions import language from pipecat.transcriptions.language import Language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator @@ -280,7 +279,7 @@ class RimeTTSService(AudioContextWordTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): """Handle interruption by clearing current context.""" await super()._handle_interruption(frame, direction) await self.stop_all_metrics() @@ -375,7 +374,7 @@ class RimeTTSService(AudioContextWordTTSService): direction: The direction to push the frame. """ await super().push_frame(frame, direction) - if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)): + if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)): if isinstance(frame, TTSStoppedFrame): await self.add_word_timestamps([("Reset", 0)]) diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 01702fd04..a9fedcc58 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -20,9 +20,9 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, + InterruptionFrame, LLMFullResponseEndFrame, StartFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, @@ -455,7 +455,7 @@ class SarvamTTSService(InterruptibleTTSService): direction: The direction to push the frame. """ await super().push_frame(frame, direction) - if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)): + if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)): self._started = False async def process_frame(self, frame: Frame, direction: FrameDirection): diff --git a/src/pipecat/services/simli/video.py b/src/pipecat/services/simli/video.py index e35dad4c6..d48a744e0 100644 --- a/src/pipecat/services/simli/video.py +++ b/src/pipecat/services/simli/video.py @@ -15,8 +15,8 @@ from pipecat.frames.frames import ( CancelFrame, EndFrame, Frame, + InterruptionFrame, OutputImageRawFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSStoppedFrame, UserStartedSpeakingFrame, @@ -179,7 +179,7 @@ class SimliVideoService(FrameProcessor): return elif isinstance(frame, (EndFrame, CancelFrame)): await self._stop() - elif isinstance(frame, (StartInterruptionFrame, UserStartedSpeakingFrame)): + elif isinstance(frame, (InterruptionFrame, UserStartedSpeakingFrame)): if not self._previously_interrupted: await self._simli_client.clearBuffer() self._previously_interrupted = self._is_trinity_avatar diff --git a/src/pipecat/services/tavus/video.py b/src/pipecat/services/tavus/video.py index aff5778d9..68309a5c9 100644 --- a/src/pipecat/services/tavus/video.py +++ b/src/pipecat/services/tavus/video.py @@ -23,12 +23,12 @@ from pipecat.frames.frames import ( CancelFrame, EndFrame, Frame, + InterruptionFrame, OutputAudioRawFrame, OutputImageRawFrame, OutputTransportReadyFrame, SpeechOutputAudioRawFrame, StartFrame, - StartInterruptionFrame, TTSAudioRawFrame, TTSStartedFrame, ) @@ -222,7 +222,7 @@ class TavusVideoService(AIService): """ await super().process_frame(frame, direction) - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): await self._handle_interruptions() await self.push_frame(frame, direction) elif isinstance(frame, TTSAudioRawFrame): diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 93800338b..02b80b609 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -20,10 +20,10 @@ from pipecat.frames.frames import ( ErrorFrame, Frame, InterimTranscriptionFrame, + InterruptionFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, StartFrame, - StartInterruptionFrame, TextFrame, TranscriptionFrame, TTSAudioRawFrame, @@ -309,7 +309,7 @@ class TTSService(AIService): and not isinstance(frame, TranscriptionFrame) ): await self._process_text_frame(frame) - elif isinstance(frame, StartInterruptionFrame): + elif isinstance(frame, InterruptionFrame): await self._handle_interruption(frame, direction) await self.push_frame(frame, direction) elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): @@ -367,14 +367,14 @@ class TTSService(AIService): await super().push_frame(frame, direction) if self._push_stop_frames and ( - isinstance(frame, StartInterruptionFrame) + isinstance(frame, InterruptionFrame) or isinstance(frame, TTSStartedFrame) or isinstance(frame, TTSAudioRawFrame) or isinstance(frame, TTSStoppedFrame) ): await self._stop_frame_queue.put(frame) - async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): self._processing_text = False await self._text_aggregator.handle_interruption() for filter in self._text_filters: @@ -438,7 +438,7 @@ class TTSService(AIService): ) if isinstance(frame, TTSStartedFrame): has_started = True - elif isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)): + elif isinstance(frame, (TTSStoppedFrame, InterruptionFrame)): has_started = False except asyncio.TimeoutError: if has_started: @@ -523,7 +523,7 @@ class WordTTSService(TTSService): elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): await self.flush_audio() - async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): await super()._handle_interruption(frame, direction) self._llm_response_started = False self.reset_word_timestamps() @@ -613,7 +613,7 @@ class InterruptibleTTSService(WebsocketTTSService): # user interrupts we need to reconnect. self._bot_speaking = False - async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): await super()._handle_interruption(frame, direction) if self._bot_speaking: await self._disconnect() @@ -685,7 +685,7 @@ class InterruptibleWordTTSService(WebsocketWordTTSService): # user interrupts we need to reconnect. self._bot_speaking = False - async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): await super()._handle_interruption(frame, direction) if self._bot_speaking: await self._disconnect() @@ -813,7 +813,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService): await super().cancel(frame) await self._stop_audio_context_task() - async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection): + async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): await super()._handle_interruption(frame, direction) await self._stop_audio_context_task() self._create_audio_context_task() diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index cb3808bf9..228b8f6e7 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -33,10 +33,10 @@ from pipecat.frames.frames import ( Frame, InputAudioRawFrame, InputImageRawFrame, + InterruptionFrame, MetricsFrame, SpeechControlParamsFrame, StartFrame, - StartInterruptionFrame, StopFrame, SystemFrame, UserSpeakingFrame, @@ -340,7 +340,7 @@ class BaseInputTransport(FrameProcessor): logger.debug("Bot interruption") if self.interruptions_allowed: await self._start_interruption() - await self.push_frame(StartInterruptionFrame()) + await self.push_frame(InterruptionFrame()) async def _handle_user_interruption(self, vad_state: VADState, emulated: bool = False): """Handle user interruption events based on speaking state.""" @@ -366,7 +366,7 @@ class BaseInputTransport(FrameProcessor): # Push an out-of-band frame (i.e. not using the ordered push # frame task) to stop everything, specially at the output # transport. - await self.push_frame(StartInterruptionFrame()) + await self.push_frame(InterruptionFrame()) elif self.interruption_strategies and self._bot_speaking: logger.debug( "User started speaking while bot is speaking with interruption config - " diff --git a/src/pipecat/transports/base_output.py b/src/pipecat/transports/base_output.py index 3830aa9c9..34c66571c 100644 --- a/src/pipecat/transports/base_output.py +++ b/src/pipecat/transports/base_output.py @@ -30,6 +30,7 @@ from pipecat.frames.frames import ( EndFrame, Frame, InputTransportMessageUrgentFrame, + InterruptionFrame, MixerControlFrame, OutputAudioRawFrame, OutputDTMFFrame, @@ -39,7 +40,6 @@ from pipecat.frames.frames import ( SpeechOutputAudioRawFrame, SpriteFrame, StartFrame, - StartInterruptionFrame, SystemFrame, TransportMessageFrame, TransportMessageUrgentFrame, @@ -299,7 +299,7 @@ class BaseOutputTransport(FrameProcessor): elif isinstance(frame, CancelFrame): await self.cancel(frame) await self.push_frame(frame, direction) - elif isinstance(frame, StartInterruptionFrame): + elif isinstance(frame, InterruptionFrame): await self.push_frame(frame, direction) await self._handle_frame(frame) elif isinstance(frame, TransportMessageUrgentFrame) and not isinstance( @@ -340,7 +340,7 @@ class BaseOutputTransport(FrameProcessor): sender = self._media_senders[frame.transport_destination] - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): await sender.handle_interruptions(frame) elif isinstance(frame, OutputAudioRawFrame): await sender.handle_audio_frame(frame) @@ -491,7 +491,7 @@ class BaseOutputTransport(FrameProcessor): await self._cancel_clock_task() await self._cancel_video_task() - async def handle_interruptions(self, _: StartInterruptionFrame): + async def handle_interruptions(self, _: InterruptionFrame): """Handle interruption events by restarting tasks and clearing buffers. Args: diff --git a/src/pipecat/transports/tavus/transport.py b/src/pipecat/transports/tavus/transport.py index aea30f72f..23e14f97b 100644 --- a/src/pipecat/transports/tavus/transport.py +++ b/src/pipecat/transports/tavus/transport.py @@ -25,9 +25,9 @@ from pipecat.frames.frames import ( EndFrame, Frame, InputAudioRawFrame, + InterruptionFrame, OutputAudioRawFrame, StartFrame, - StartInterruptionFrame, TransportMessageFrame, TransportMessageUrgentFrame, ) @@ -618,7 +618,7 @@ class TavusOutputTransport(BaseOutputTransport): direction: The direction of frame flow in the pipeline. """ await super().process_frame(frame, direction) - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): await self._handle_interruptions() async def _handle_interruptions(self): diff --git a/src/pipecat/transports/websocket/fastapi.py b/src/pipecat/transports/websocket/fastapi.py index 8287783c2..474f70de8 100644 --- a/src/pipecat/transports/websocket/fastapi.py +++ b/src/pipecat/transports/websocket/fastapi.py @@ -26,9 +26,9 @@ from pipecat.frames.frames import ( EndFrame, Frame, InputAudioRawFrame, + InterruptionFrame, OutputAudioRawFrame, StartFrame, - StartInterruptionFrame, TransportMessageFrame, TransportMessageUrgentFrame, ) @@ -398,7 +398,7 @@ class FastAPIWebsocketOutputTransport(BaseOutputTransport): """ await super().process_frame(frame, direction) - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): await self._write_frame(frame) self._next_send_time = 0 diff --git a/src/pipecat/transports/websocket/server.py b/src/pipecat/transports/websocket/server.py index 8e73fb47e..67631ab04 100644 --- a/src/pipecat/transports/websocket/server.py +++ b/src/pipecat/transports/websocket/server.py @@ -25,9 +25,9 @@ from pipecat.frames.frames import ( EndFrame, Frame, InputAudioRawFrame, + InterruptionFrame, OutputAudioRawFrame, StartFrame, - StartInterruptionFrame, TransportMessageFrame, TransportMessageUrgentFrame, ) @@ -334,7 +334,7 @@ class WebsocketServerOutputTransport(BaseOutputTransport): """ await super().process_frame(frame, direction) - if isinstance(frame, StartInterruptionFrame): + if isinstance(frame, InterruptionFrame): await self._write_frame(frame) self._next_send_time = 0 diff --git a/tests/test_context_aggregators.py b/tests/test_context_aggregators.py index e04487f69..9c29c78e8 100644 --- a/tests/test_context_aggregators.py +++ b/tests/test_context_aggregators.py @@ -17,11 +17,11 @@ from pipecat.frames.frames import ( FunctionCallResultFrame, FunctionCallResultProperties, InterimTranscriptionFrame, + InterruptionFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, OpenAILLMContextAssistantTimestampFrame, SpeechControlParamsFrame, - StartInterruptionFrame, TextFrame, TranscriptionFrame, UserStartedSpeakingFrame, @@ -618,7 +618,7 @@ class BaseTestAssistantContextAggreagator: TextFrame(text="Pipecat."), LLMFullResponseEndFrame(), SleepFrame(AGGREGATION_SLEEP), - StartInterruptionFrame(), + InterruptionFrame(), LLMFullResponseStartFrame(), TextFrame(text="How are "), TextFrame(text="you?"), @@ -626,7 +626,7 @@ class BaseTestAssistantContextAggreagator: ] expected_down_frames = [ *self.EXPECTED_CONTEXT_FRAMES, - StartInterruptionFrame, + InterruptionFrame, *self.EXPECTED_CONTEXT_FRAMES, ] await run_test( diff --git a/tests/test_llm_response.py b/tests/test_llm_response.py index 93838a658..663bd6671 100644 --- a/tests/test_llm_response.py +++ b/tests/test_llm_response.py @@ -7,10 +7,10 @@ import unittest from pipecat.frames.frames import ( + InterruptionFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMTextFrame, - StartInterruptionFrame, ) from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator from pipecat.tests.utils import SleepFrame, run_test @@ -113,7 +113,7 @@ class TestLLMFullResponseAggregator(unittest.IsolatedAsyncioTestCase): LLMFullResponseStartFrame(), LLMTextFrame("Hello "), SleepFrame(), - StartInterruptionFrame(), + InterruptionFrame(), LLMFullResponseStartFrame(), LLMTextFrame("Hello "), LLMTextFrame("there!"), @@ -122,7 +122,7 @@ class TestLLMFullResponseAggregator(unittest.IsolatedAsyncioTestCase): expected_down_frames = [ LLMFullResponseStartFrame, LLMTextFrame, - StartInterruptionFrame, + InterruptionFrame, LLMFullResponseStartFrame, LLMTextFrame, LLMTextFrame, diff --git a/tests/test_transcript_processor.py b/tests/test_transcript_processor.py index a2a057a13..b433951ce 100644 --- a/tests/test_transcript_processor.py +++ b/tests/test_transcript_processor.py @@ -14,7 +14,7 @@ from pipecat.frames.frames import ( BotStartedSpeakingFrame, BotStoppedSpeakingFrame, CancelFrame, - StartInterruptionFrame, + InterruptionFrame, TranscriptionFrame, TranscriptionMessage, TranscriptionUpdateFrame, @@ -238,7 +238,7 @@ class TestUserTranscriptProcessor(unittest.IsolatedAsyncioTestCase): TTSTextFrame(text="Hello"), TTSTextFrame(text="world!"), SleepFrame(), - StartInterruptionFrame(), # User interrupts here + InterruptionFrame(), # User interrupts here SleepFrame(), BotStartedSpeakingFrame(), TTSTextFrame(text="New"), @@ -252,7 +252,7 @@ class TestUserTranscriptProcessor(unittest.IsolatedAsyncioTestCase): BotStartedSpeakingFrame, TTSTextFrame, # "Hello" TTSTextFrame, # "world!" - StartInterruptionFrame, + InterruptionFrame, TranscriptionUpdateFrame, # First message (emitted due to interruption) BotStartedSpeakingFrame, TTSTextFrame, # "New"