From 76c79a7dfae21b54a1a8e59c7ba7bc44d2008e9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 12 Nov 2025 15:24:54 -0800 Subject: [PATCH 01/30] introduce new user and bot turn start strategies --- src/pipecat/turns/__init__.py | 0 .../turns/bot/base_bot_turn_start_strategy.py | 74 +++++++++ .../transcription_bot_turn_start_strategy.py | 111 +++++++++++++ .../turn_analyzer_bot_turn_start_strategy.py | 152 ++++++++++++++++++ src/pipecat/turns/user/__init__.py | 0 .../user/base_user_turn_start_strategy.py | 73 +++++++++ .../min_words_user_turn_start_strategy.py | 91 +++++++++++ .../user/vad_user_turn_start_strategy.py | 30 ++++ 8 files changed, 531 insertions(+) create mode 100644 src/pipecat/turns/__init__.py create mode 100644 src/pipecat/turns/bot/base_bot_turn_start_strategy.py create mode 100644 src/pipecat/turns/bot/transcription_bot_turn_start_strategy.py create mode 100644 src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py create mode 100644 src/pipecat/turns/user/__init__.py create mode 100644 src/pipecat/turns/user/base_user_turn_start_strategy.py create mode 100644 src/pipecat/turns/user/min_words_user_turn_start_strategy.py create mode 100644 src/pipecat/turns/user/vad_user_turn_start_strategy.py diff --git a/src/pipecat/turns/__init__.py b/src/pipecat/turns/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/pipecat/turns/bot/base_bot_turn_start_strategy.py b/src/pipecat/turns/bot/base_bot_turn_start_strategy.py new file mode 100644 index 000000000..bf804ba9c --- /dev/null +++ b/src/pipecat/turns/bot/base_bot_turn_start_strategy.py @@ -0,0 +1,74 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Base turn start strategy for determining when the bot should start speaking.""" + +from typing import Optional + +from pipecat.frames.frames import Frame +from pipecat.utils.asyncio.task_manager import BaseTaskManager +from pipecat.utils.base_object import BaseObject + + +class BaseBotTurnStartStrategy(BaseObject): + """Base class for strategies that determine when the bot should start speaking. + + Subclasses should implement logic to detect when the bot should start + speaking. This could be based on analyzing incoming frames (such as + transcriptions), conversation state, or other heuristics. + + Events triggered by bot turn start strategies: + + - `on_push_frame`: Indicates the strategy wants to push a frame. + - `on_bot_turn_started`: Signals that the bot should start speaking. + + """ + + def __init__(self, **kwargs): + """Initialize the base bot turn start strategy.""" + super().__init__(**kwargs) + self._task_manager: Optional[BaseTaskManager] = None + self._register_event_handler("on_push_frame", sync=True) + self._register_event_handler("on_bot_turn_started", sync=True) + + @property + def task_manager(self) -> BaseTaskManager: + """Returns the configured task manager.""" + if not self._task_manager: + raise RuntimeError(f"{self} bot turn start strategy was not properly setup") + return self._task_manager + + async def reset(self): + """Reset the strategy to its initial state.""" + pass + + async def setup(self, task_manager: BaseTaskManager): + """Initialize the strategy with the given task manager. + + Args: + task_manager: The task manager to be associated with this instance. + """ + self._task_manager = task_manager + + async def cleanup(self): + """Cleanup the strategy.""" + pass + + async def process_frame(self, frame: Frame): + """Process an incoming frame to decide whether the bot should speak. + + Subclasses should override this to implement logic that decides whether + the bot turn has started. + + Args: + frame: The frame to be analyzed. + + """ + pass + + async def trigger_bot_turn_started(self): + """Trigger the `on_bot_turn_started` event.""" + await self._call_event_handler("on_bot_turn_started") diff --git a/src/pipecat/turns/bot/transcription_bot_turn_start_strategy.py b/src/pipecat/turns/bot/transcription_bot_turn_start_strategy.py new file mode 100644 index 000000000..36541f765 --- /dev/null +++ b/src/pipecat/turns/bot/transcription_bot_turn_start_strategy.py @@ -0,0 +1,111 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Transcription time-based speaking strategy.""" + +import asyncio +from typing import Optional + +from pipecat.frames.frames import ( + Frame, + TranscriptionFrame, + VADUserStartedSpeakingFrame, + VADUserStoppedSpeakingFrame, +) +from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy +from pipecat.utils.asyncio.task_manager import BaseTaskManager + + +class TranscriptionBotTurnStartStrategy(BaseBotTurnStartStrategy): + """Bot turn start strategy based on transcriptions. + + This strategy assumes the bot should start speaking once a transcription + has been received and the user is not actively speaking. It handles + multiple or delayed transcription frames gracefully. + """ + + def __init__(self, *, timeout: float = 0.5): + """Initialize the transcription-based bot turn start strategy. + + Args: + timeout: A short delay used internally to handle consecutive or + slightly delayed transcriptions. + """ + super().__init__() + self._timeout = timeout + self._text = "" + self._vad_user_speaking = False + self._event = asyncio.Event() + self._task: Optional[asyncio.Task] = None + + async def reset(self): + """Reset the strategy to its initial state.""" + await super().reset() + self._text = "" + self._vad_user_speaking = False + self._event.clear() + + async def setup(self, task_manager: BaseTaskManager): + """Initialize the strategy with the given task manager. + + Args: + task_manager: The task manager to be associated with this instance. + """ + await super().setup(task_manager) + self._task = task_manager.create_task(self._task_handler(), f"{self}::_task_handler") + + async def cleanup(self): + """Cleanup the strategy.""" + await super().cleanup() + if self._task: + await self.task_manager.cancel_task(self._task) + self._task = None + + async def process_frame(self, frame: Frame): + """Process an incoming frame to update strategy state. + + Updates internal transcription text and VAD state. The bot turn will be + triggered when appropriate based on the collected frames. + + Args: + frame: The frame to be analyzed. + + """ + if isinstance(frame, VADUserStartedSpeakingFrame): + await self._handle_vad_user_started_speaking(frame) + elif isinstance(frame, VADUserStoppedSpeakingFrame): + await self._handle_vad_user_stopped_speaking(frame) + elif isinstance(frame, TranscriptionFrame): + await self._handle_transcription(frame) + + async def _handle_vad_user_started_speaking(self, _: VADUserStartedSpeakingFrame): + """Handle when the VAD indicates the user is speaking.""" + self._vad_user_speaking = True + + async def _handle_vad_user_stopped_speaking(self, _: VADUserStoppedSpeakingFrame): + """Handle when the VAD indicates the user has stopped speaking.""" + self._vad_user_speaking = False + + async def _handle_transcription(self, frame: TranscriptionFrame): + """Handle user transcription.""" + self._text += frame.text + self._event.set() + + async def _task_handler(self): + """Asynchronously monitor transcriptions and trigger bot turn when ready. + + If transcription text exists and the user is not currently speaking, + triggers the bot turn. Handles multiple or delayed transcriptions + gracefully. + + """ + while True: + try: + await asyncio.wait_for(self._event.wait(), timeout=self._timeout) + self._event.clear() + except asyncio.TimeoutError: + if self._text and not self._vad_user_speaking: + await self.trigger_bot_turn_started() diff --git a/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py b/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py new file mode 100644 index 000000000..4e09338fb --- /dev/null +++ b/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py @@ -0,0 +1,152 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Bot turn start strategy based on turn detection analyzers.""" + +import asyncio +from typing import Optional + +from pipecat.audio.turn.base_turn_analyzer import BaseTurnAnalyzer, EndOfTurnState +from pipecat.frames.frames import ( + Frame, + InputAudioRawFrame, + InterimTranscriptionFrame, + MetricsFrame, + StartFrame, + TranscriptionFrame, + VADUserStartedSpeakingFrame, + VADUserStoppedSpeakingFrame, +) +from pipecat.metrics.metrics import MetricsData +from pipecat.processors.frame_processor import FrameDirection +from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy +from pipecat.utils.asyncio.task_manager import BaseTaskManager + + +class TurnAnalyzerBotTurnStartStrategy(BaseBotTurnStartStrategy): + """Bot turn start strategy using a turn detection model to detect end of user turn. + + This strategy uses the turn detection models to determine when the user has + finished speaking, combining audio, VAD, and transcription frames. Once the + turn is considered complete, the bot turn is triggered. + + """ + + def __init__(self, *, turn_analyzer: BaseTurnAnalyzer, timeout: float = 0.5): + """Initialize the bot turn start strategy. + + Args: + turn_analyzer: The turn detection analyzer instance to detect end of user turn. + timeout: Short delay used internally to handle frame timing and event triggering. + """ + super().__init__() + self._turn_analyzer = turn_analyzer + self._timeout = timeout + self._text = "" + self._vad_user_speaking = False + self._event = asyncio.Event() + self._task: Optional[asyncio.Task] = None + + async def reset(self): + """Reset the strategy to its initial state.""" + await super().reset() + self._text = "" + self._vad_user_speaking = False + self._event.set() + + async def setup(self, task_manager: BaseTaskManager): + """Initialize the strategy with the given task manager. + + Args: + task_manager: The task manager to be associated with this instance. + """ + await super().setup(task_manager) + self._task = task_manager.create_task(self._task_handler(), f"{self}::_task_handler") + + async def cleanup(self): + """Cleanup the strategy.""" + await super().cleanup() + if self._task: + await self.task_manager.cancel_task(self._task) + self._task = None + + async def process_frame(self, frame: Frame): + """Process an incoming frame to update the turn analyzer and strategy state. + + Args: + frame: The frame to be analyzed. + """ + await super().process_frame(frame) + + if isinstance(frame, StartFrame): + await self._start(frame) + elif isinstance(frame, VADUserStartedSpeakingFrame): + await self._handle_vad_user_started_speaking(frame) + elif isinstance(frame, VADUserStoppedSpeakingFrame): + await self._handle_vad_user_stopped_speaking(frame) + elif isinstance(frame, InputAudioRawFrame): + await self._handle_input_audio(frame) + elif isinstance(frame, (TranscriptionFrame, InterimTranscriptionFrame)): + await self._handle_transcription(frame) + + async def _start(self, frame: StartFrame): + """Process the start frame to configure the turn analyzer.""" + self._turn_analyzer.set_sample_rate(frame.audio_in_sample_rate) + + async def _handle_input_audio(self, frame: InputAudioRawFrame): + """Handle input audio to check if the turn is completed.""" + state = self._turn_analyzer.append_audio(frame.audio, self._vad_user_speaking) + await self._handle_end_of_turn(state) + + async def _handle_vad_user_started_speaking(self, _: VADUserStartedSpeakingFrame): + """Handle when the VAD indicates the user is speaking.""" + self._vad_user_speaking = True + self._event.set() + + async def _handle_vad_user_stopped_speaking(self, _: VADUserStoppedSpeakingFrame): + """Handle when the VAD indicates the user has stopped speaking.""" + self._vad_user_speaking = False + self._event.set() + + state, prediction = await self._turn_analyzer.analyze_end_of_turn() + await self._handle_prediction_result(prediction) + await self._handle_end_of_turn(state) + + async def _handle_transcription(self, frame: TranscriptionFrame | InterimTranscriptionFrame): + """Handle user transcription.""" + # We don't really care about the content. + self._text = frame.text + self._event.set() + + async def _handle_end_of_turn(self, state: EndOfTurnState): + """Handle completion of end-of-turn analysis.""" + if state == EndOfTurnState.COMPLETE: + self._event.set() + + async def _handle_prediction_result(self, result: Optional[MetricsData]): + """Handle a prediction result event from the turn analyzer.""" + if result: + await self._call_event_handler( + "on_push_frame", + MetricsFrame(data=[result]), + FrameDirection.DOWNSTREAM, + ) + + async def _task_handler(self): + """Asynchronously monitor events and trigger bot turn when appropriate. + + If we have not received a transcription in the specified amount of time + (and we initially received one) and the turn analyzer said the turn is + done, then the bot is ready to speak. + + """ + while True: + try: + await asyncio.wait_for(self._event.wait(), timeout=self._timeout) + self._event.clear() + except asyncio.TimeoutError: + if self._text: + await self.trigger_bot_turn_started() diff --git a/src/pipecat/turns/user/__init__.py b/src/pipecat/turns/user/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/pipecat/turns/user/base_user_turn_start_strategy.py b/src/pipecat/turns/user/base_user_turn_start_strategy.py new file mode 100644 index 000000000..216932f45 --- /dev/null +++ b/src/pipecat/turns/user/base_user_turn_start_strategy.py @@ -0,0 +1,73 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Base turn start strategy for determining when the user starts speaking.""" + +from typing import Optional + +from pipecat.frames.frames import Frame +from pipecat.utils.asyncio.task_manager import BaseTaskManager +from pipecat.utils.base_object import BaseObject + + +class BaseUserTurnStartStrategy(BaseObject): + """Base class for strategies that determine when a user starts speaking. + + Subclasses should implement logic to detect the start of a user's turn. + This could be based on voice activity, number of words spoken, or other + heuristics. + + Events triggered by user turn start strategies: + + - `on_push_frame`: Indicates the strategy wants to push a frame. + - `on_user_turn_started`: Signals that a user turn has started. + """ + + def __init__(self, **kwargs): + """Initialize the base user turn start strategy.""" + super().__init__(**kwargs) + self._task_manager: Optional[BaseTaskManager] = None + self._register_event_handler("on_push_frame", sync=True) + self._register_event_handler("on_user_turn_started", sync=True) + + @property + def task_manager(self) -> BaseTaskManager: + """Returns the configured task manager.""" + if not self._task_manager: + raise RuntimeError(f"{self} user turn start strategy was not properly setup") + return self._task_manager + + async def reset(self): + """Reset the strategy to its initial state.""" + pass + + async def setup(self, task_manager: BaseTaskManager): + """Initialize the strategy with the given task manager. + + Args: + task_manager: The task manager to be associated with this instance. + """ + self._task_manager = task_manager + + async def cleanup(self): + """Cleanup the strategy.""" + pass + + async def process_frame(self, frame: Frame): + """Process an incoming frame. + + Subclasses should override this to implement logic that decides whether + the user turn has started. + + Args: + frame: The frame to be processed. + + """ + pass + + async def trigger_user_turn_started(self): + """Trigger the `on_user_turn_started` event.""" + await self._call_event_handler("on_user_turn_started") diff --git a/src/pipecat/turns/user/min_words_user_turn_start_strategy.py b/src/pipecat/turns/user/min_words_user_turn_start_strategy.py new file mode 100644 index 000000000..61204aec8 --- /dev/null +++ b/src/pipecat/turns/user/min_words_user_turn_start_strategy.py @@ -0,0 +1,91 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""User turn start strategy based on a minimum number of words spoken by the user.""" + +from loguru import logger + +from pipecat.frames.frames import Frame, InterimTranscriptionFrame, TranscriptionFrame +from pipecat.turns.user.base_user_turn_start_strategy import BaseUserTurnStartStrategy + + +class MinWordsUserTurnStartStrategy(BaseUserTurnStartStrategy): + """User turn start strategy based on a minimum number of words spoken by the user. + + This strategy signals the start of a user turn once the user has spoken at + least a specified number of words, as determined from transcription frames. + Optionally, interim transcriptions can be used for earlier detection. + + """ + + def __init__(self, *, min_words: int, use_interim: bool = True): + """Initialize the minimum words bot turn start strategy. + + Args: + min_words: Minimum number of spoken words required to trigger the + start of a user turn. + use_interim: Whether to consider interim transcription frames for + earlier detection. + """ + super().__init__() + self._min_words = min_words + self._use_interim = use_interim + self._text = "" + + async def reset(self): + """Reset the strategy to its initial state.""" + await super().reset() + self._text = "" + + async def process_frame(self, frame: Frame): + """Process an incoming frame to detect the start of a user turn. + + This method updates internal state based on transcription frames and + triggers the user turn once the minimum word count is reached. + + Args: + frame: The frame to be analyzed. + """ + await super().process_frame(frame) + + if isinstance(frame, TranscriptionFrame): + await self._handle_transcription(frame) + elif isinstance(frame, InterimTranscriptionFrame) and self._use_interim: + await self._handle_interim_transcription(frame) + + async def _handle_transcription(self, frame: TranscriptionFrame): + """Handle a completed transcription frame and check word count. + + Args: + frame: The transcription frame to be processed. + """ + self._text += frame.text + + word_count = len(self._text.split()) + should_trigger = word_count >= self._min_words + + logger.debug( + f"{self} should_trigger={should_trigger} num_spoken_words={word_count} min_words={self._min_words}" + ) + + if should_trigger: + await self.trigger_user_turn_started() + + async def _handle_interim_transcription(self, frame: InterimTranscriptionFrame): + """Handle an interim transcription frame and check word count. + + Args: + frame: The interim transcription frame to be processed. + """ + word_count = len(frame.text.split()) + should_trigger = word_count >= self._min_words + + logger.debug( + f"{self} interim=True should_trigger={should_trigger} num_spoken_words={word_count} min_words={self._min_words}" + ) + + if should_trigger: + await self.trigger_user_turn_started() diff --git a/src/pipecat/turns/user/vad_user_turn_start_strategy.py b/src/pipecat/turns/user/vad_user_turn_start_strategy.py new file mode 100644 index 000000000..3a0e491c2 --- /dev/null +++ b/src/pipecat/turns/user/vad_user_turn_start_strategy.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""User turn start strategy based on VAD events.""" + +from pipecat.frames.frames import Frame, VADUserStartedSpeakingFrame +from pipecat.turns.user.base_user_turn_start_strategy import BaseUserTurnStartStrategy + + +class VADUserTurnStartStrategy(BaseUserTurnStartStrategy): + """User turn start strategy based on VAD (Voice Activity Detection). + + This strategy assumes the user turn starts as soon as a VAD frame indicates + that the user has started speaking. + + """ + + async def process_frame(self, frame: Frame): + """Process an incoming frame to detect user turn start. + + Args: + frame: The frame to be analyzed. + """ + await super().process_frame(frame) + + if isinstance(frame, VADUserStartedSpeakingFrame): + await self.trigger_user_turn_started() From 5dd3af25acd85538395453b529df816ef44b3dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 12 Nov 2025 15:26:57 -0800 Subject: [PATCH 02/30] frames: add turn start strategies to StartFrame --- src/pipecat/frames/frames.py | 2 ++ src/pipecat/turns/turn_start_strategies.py | 31 ++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 src/pipecat/turns/turn_start_strategies.py diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 489fd53ad..03ad7d41b 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -40,6 +40,7 @@ from pipecat.utils.utils import obj_count, obj_id if TYPE_CHECKING: from pipecat.processors.aggregators.llm_context import LLMContext, LLMContextMessage, NotGiven from pipecat.processors.frame_processor import FrameProcessor + from pipecat.turns.turn_start_strategies import TurnStartStrategies class DeprecatedKeypadEntry: @@ -959,6 +960,7 @@ class StartFrame(SystemFrame): enable_tracing: bool = False enable_usage_metrics: bool = False interruption_strategies: List[BaseInterruptionStrategy] = field(default_factory=list) + turn_start_strategies: Optional["TurnStartStrategies"] = None report_only_initial_ttfb: bool = False diff --git a/src/pipecat/turns/turn_start_strategies.py b/src/pipecat/turns/turn_start_strategies.py new file mode 100644 index 000000000..a3a344ef3 --- /dev/null +++ b/src/pipecat/turns/turn_start_strategies.py @@ -0,0 +1,31 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Turn start strategy configuration.""" + +from dataclasses import dataclass +from typing import List + +from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy +from pipecat.turns.user.base_user_turn_start_strategy import BaseUserTurnStartStrategy + + +@dataclass +class TurnStartStrategies: + """Container for user and bot turn start strategies. + + This class groups the configured turn start strategies for both the user + and the bot. + + Attributes: + user: A list of user turn start strategies used to detect when the + user starts speaking. + bot: A list of bot turn start strategies used to decide when the bot + should start speaking. + """ + + user: List[BaseUserTurnStartStrategy] + bot: List[BaseBotTurnStartStrategy] From 6a62c8d6da75cb3075f835439f018731699e231b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 12 Nov 2025 15:27:30 -0800 Subject: [PATCH 03/30] FrameProcessor: add user and bot turn start strategies --- src/pipecat/processors/frame_processor.py | 31 ++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index ed77506dd..ddb546ef7 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -15,7 +15,18 @@ import asyncio import traceback from dataclasses import dataclass from enum import Enum -from typing import Any, Awaitable, Callable, Coroutine, List, Optional, Sequence, Tuple, Type +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Coroutine, + List, + Optional, + Sequence, + Tuple, + Type, +) from loguru import logger @@ -41,6 +52,9 @@ from pipecat.processors.metrics.frame_processor_metrics import FrameProcessorMet from pipecat.utils.asyncio.task_manager import BaseTaskManager from pipecat.utils.base_object import BaseObject +if TYPE_CHECKING: + from pipecat.turns.turn_start_strategies import TurnStartStrategies + class FrameDirection(Enum): """Direction of frame flow in the processing pipeline. @@ -185,6 +199,7 @@ class FrameProcessor(BaseObject): self._enable_usage_metrics = False self._report_only_initial_ttfb = False self._interruption_strategies: List[BaseInterruptionStrategy] = [] + self._turn_start_strategies: Optional["TurnStartStrategies"] = None # Indicates whether we have received the StartFrame. self.__started = False @@ -344,11 +359,24 @@ class FrameProcessor(BaseObject): def interruption_strategies(self) -> Sequence[BaseInterruptionStrategy]: """Get the interruption strategies for this processor. + .. deprecated:: 0.0.98 + This function is deprecated, use the new user and bot turn start + strategies insted. + Returns: Sequence of interruption strategies. """ return self._interruption_strategies + @property + def turn_start_strategies(self) -> Optional["TurnStartStrategies"]: + """Get the user and bot turn start strategies for this processor. + + Returns: + The user and bot turn start strategies. + """ + return self._turn_start_strategies + @property def task_manager(self) -> BaseTaskManager: """Get the task manager for this processor. @@ -763,6 +791,7 @@ class FrameProcessor(BaseObject): self._enable_metrics = frame.enable_metrics self._enable_usage_metrics = frame.enable_usage_metrics self._interruption_strategies = frame.interruption_strategies + self._turn_start_strategies = frame.turn_start_strategies self._report_only_initial_ttfb = frame.report_only_initial_ttfb self.__create_process_task() From 0f6668d41b8c75f4c02ea958914e3698b4706d63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 12 Nov 2025 15:27:49 -0800 Subject: [PATCH 04/30] PipelineTask: pass turn start strategies to StartFrame --- src/pipecat/pipeline/task.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index 90976b52c..388723acd 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -47,6 +47,11 @@ from pipecat.pipeline.base_task import BasePipelineTask, PipelineTaskParams from pipecat.pipeline.pipeline import Pipeline, PipelineSink, PipelineSource from pipecat.pipeline.task_observer import TaskObserver from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup +from pipecat.turns.bot.transcription_bot_turn_start_strategy import ( + TranscriptionBotTurnStartStrategy, +) +from pipecat.turns.turn_start_strategies import TurnStartStrategies +from pipecat.turns.user.vad_user_turn_start_strategy import VADUserTurnStartStrategy from pipecat.utils.asyncio.task_manager import BaseTaskManager, TaskManager, TaskManagerParams from pipecat.utils.tracing.setup import is_tracing_available from pipecat.utils.tracing.turn_trace_observer import TurnTraceObserver @@ -111,7 +116,12 @@ class PipelineParams(BaseModel): enable_metrics: Whether to enable metrics collection. enable_usage_metrics: Whether to enable usage metrics. heartbeats_period_secs: Period between heartbeats in seconds. - interruption_strategies: Strategies for bot interruption behavior. + interruption_strategies: [deprecated] Strategies for bot interruption behavior. + + .. deprecated:: 0.0.99 + Use the `turn_start_strategies` instead. + + turn_start_strategies: User and bot turn start strategies. observers: [deprecated] Use `observers` arg in `PipelineTask` class. .. deprecated:: 0.0.58 @@ -132,6 +142,7 @@ class PipelineParams(BaseModel): enable_usage_metrics: bool = False heartbeats_period_secs: float = HEARTBEAT_SECS interruption_strategies: List[BaseInterruptionStrategy] = Field(default_factory=list) + turn_start_strategies: Optional[TurnStartStrategies] = None observers: List[BaseObserver] = Field(default_factory=list) report_only_initial_ttfb: bool = False send_initial_empty_metrics: bool = True @@ -278,6 +289,14 @@ class PipelineTask(BasePipelineTask): additional_span_attributes=self._additional_span_attributes, ) observers.append(self._turn_trace_observer) + + # Initialize default user and bot turn start strategies. + if not self._params.turn_start_strategies: + self._params.turn_start_strategies = TurnStartStrategies( + user=[VADUserTurnStartStrategy()], + bot=[TranscriptionBotTurnStartStrategy()], + ) + self._finished = False self._cancelled = False @@ -694,6 +713,7 @@ class PipelineTask(BasePipelineTask): enable_usage_metrics=self._params.enable_usage_metrics, report_only_initial_ttfb=self._params.report_only_initial_ttfb, interruption_strategies=self._params.interruption_strategies, + turn_start_strategies=self._params.turn_start_strategies, ) start_frame.metadata = self._params.start_metadata await self._pipeline.queue_frame(start_frame) From 223052e6e7e4c33749e7c673966cac225a6145d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 12 Nov 2025 15:30:05 -0800 Subject: [PATCH 05/30] LLMUserAggregator: use new user and bot turn start strategies --- .../aggregators/llm_response_universal.py | 319 +++++------------- 1 file changed, 88 insertions(+), 231 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 810280729..df3f707c3 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -20,24 +20,16 @@ from typing import Any, Dict, List, Literal, Optional, Set from loguru import logger from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.interruptions.base_interruption_strategy import BaseInterruptionStrategy -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import ( AssistantImageRawFrame, - BotStartedSpeakingFrame, BotStoppedSpeakingFrame, CancelFrame, - EmulateUserStartedSpeakingFrame, - EmulateUserStoppedSpeakingFrame, EndFrame, Frame, FunctionCallCancelFrame, FunctionCallInProgressFrame, FunctionCallResultFrame, FunctionCallsStartedFrame, - InputAudioRawFrame, - InterimTranscriptionFrame, InterruptionFrame, LLMContextAssistantTimestampFrame, LLMContextFrame, @@ -51,7 +43,6 @@ from pipecat.frames.frames import ( LLMThoughtEndFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, - SpeechControlParamsFrame, StartFrame, TextFrame, TranscriptionFrame, @@ -70,6 +61,8 @@ from pipecat.processors.aggregators.llm_response import ( LLMUserAggregatorParams, ) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor +from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy +from pipecat.turns.user.base_user_turn_start_strategy import BaseUserTurnStartStrategy from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregated_text from pipecat.utils.time import time_now_iso8601 @@ -222,36 +215,23 @@ class LLMUserAggregator(LLMContextAggregator): """ super().__init__(context=context, role="user", **kwargs) self._params = params or LLMUserAggregatorParams() - self._vad_params: Optional[VADParams] = None - self._turn_params: Optional[SmartTurnParams] = None - - if "aggregation_timeout" in kwargs: - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "Parameter 'aggregation_timeout' is deprecated, use 'params' instead.", - DeprecationWarning, - ) - - self._params.aggregation_timeout = kwargs["aggregation_timeout"] self._user_speaking = False - self._bot_speaking = False - self._was_bot_speaking = False - self._emulating_vad = False - self._seen_interim_results = False - self._waiting_for_aggregation = False - self._aggregation_event = asyncio.Event() - self._aggregation_task = None + async def cleanup(self): + """Clean up processor resources.""" + await super().cleanup() + await self._cleanup() async def reset(self): """Reset the aggregation state and interruption strategies.""" await super().reset() - self._was_bot_speaking = False - self._seen_interim_results = False - self._waiting_for_aggregation = False - [await s.reset() for s in self._interruption_strategies] + + if self.turn_start_strategies: + for s in self.turn_start_strategies.user: + await s.reset() + for s in self.turn_start_strategies.bot: + await s.reset() async def process_frame(self, frame: Frame, direction: FrameDirection): """Process frames for user speech aggregation and context management. @@ -275,25 +255,8 @@ class LLMUserAggregator(LLMContextAggregator): elif isinstance(frame, CancelFrame): await self._cancel(frame) await self.push_frame(frame, direction) - elif isinstance(frame, InputAudioRawFrame): - await self._handle_input_audio(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, UserStartedSpeakingFrame): - await self._handle_user_started_speaking(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, UserStoppedSpeakingFrame): - await self._handle_user_stopped_speaking(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, BotStartedSpeakingFrame): - await self._handle_bot_started_speaking(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, BotStoppedSpeakingFrame): - await self._handle_bot_stopped_speaking(frame) - await self.push_frame(frame, direction) elif isinstance(frame, TranscriptionFrame): await self._handle_transcription(frame) - elif isinstance(frame, InterimTranscriptionFrame): - await self._handle_interim_transcription(frame) elif isinstance(frame, LLMRunFrame): await self._handle_llm_run(frame) elif isinstance(frame, LLMMessagesAppendFrame): @@ -310,76 +273,55 @@ class LLMUserAggregator(LLMContextAggregator): await self.push_frame(frame, direction) elif isinstance(frame, LLMSetToolChoiceFrame): self.set_tool_choice(frame.tool_choice) - elif isinstance(frame, SpeechControlParamsFrame): - self._vad_params = frame.vad_params - self._turn_params = frame.turn_params - await self.push_frame(frame, direction) else: await self.push_frame(frame, direction) - async def _process_aggregation(self): - """Process the current aggregation and push it downstream.""" + await self._turn_start_strategies_process_frame(frame) + + async def push_aggregation(self): + """Push the current aggregation.""" + if len(self._aggregation) == 0: + return + aggregation = self.aggregation_string() await self.reset() self._context.add_message({"role": self.role, "content": aggregation}) - frame = LLMContextFrame(self._context) - await self.push_frame(frame) - - async def push_aggregation(self): - """Push the current aggregation based on interruption strategies and conditions.""" - if len(self._aggregation) > 0: - if self.interruption_strategies and self._bot_speaking: - should_interrupt = await self._should_interrupt_based_on_strategies() - - if should_interrupt: - logger.debug( - "Interruption conditions met - pushing interruption and aggregation" - ) - await self.push_interruption_task_frame_and_wait() - await self._process_aggregation() - else: - logger.debug("Interruption conditions not met - not pushing aggregation") - # Don't process aggregation, just reset it - await self.reset() - else: - # No interruption config - normal behavior (always push aggregation) - await self._process_aggregation() - # Handles the case where both the user and the bot are not speaking, - # and the bot was previously speaking before the user interruption. - # Normally, when the user stops speaking, new text is expected, - # which triggers the bot to respond. However, if no new text - # is received, this safeguard ensures - # the bot doesn't hang indefinitely while waiting to speak again. - elif not self._seen_interim_results and self._was_bot_speaking and not self._bot_speaking: - logger.warning("User stopped speaking but no new aggregation received.") - # Resetting it so we don't trigger this twice - self._was_bot_speaking = False - # TODO: we are not enabling this for now, due to some STT services which can take as long as 2 seconds two return a transcription - # So we need more tests and probably make this feature configurable, disabled it by default. - # We are just pushing the same previous context to be processed again in this case - # await self.push_frame(LLMContextFrame(self._context)) - - async def _should_interrupt_based_on_strategies(self) -> bool: - """Check if interruption should occur based on configured strategies. - - Returns: - True if any interruption strategy indicates interruption should occur. - """ - - async def should_interrupt(strategy: BaseInterruptionStrategy): - await strategy.append_text(self.aggregation_string()) - return await strategy.should_interrupt() - - return any([await should_interrupt(s) for s in self._interruption_strategies]) + await self.push_context_frame() async def _start(self, frame: StartFrame): - self._create_aggregation_task() + if not self.turn_start_strategies: + return + + for s in self.turn_start_strategies.user: + await s.setup(self.task_manager) + s.add_event_handler("on_push_frame", self._on_push_frame) + s.add_event_handler("on_user_turn_started", self._on_user_turn_started) + + for s in self.turn_start_strategies.bot: + await s.setup(self.task_manager) + s.add_event_handler("on_push_frame", self._on_push_frame) + s.add_event_handler("on_bot_turn_started", self._on_bot_turn_started) async def _stop(self, frame: EndFrame): - await self._cancel_aggregation_task() + await self._cleanup() async def _cancel(self, frame: CancelFrame): - await self._cancel_aggregation_task() + await self._cleanup() + + async def _cleanup(self): + if self.turn_start_strategies: + for s in self.turn_start_strategies.user: + await s.cleanup() + for s in self.turn_start_strategies.bot: + await s.cleanup() + + async def _turn_start_strategies_process_frame(self, frame: Frame): + if self.turn_start_strategies: + for strategy in self.turn_start_strategies.user: + await strategy.process_frame(frame) + + for strategy in self.turn_start_strategies.bot: + await strategy.process_frame(frame) async def _handle_llm_run(self, frame: LLMRunFrame): await self.push_context_frame() @@ -394,42 +336,6 @@ class LLMUserAggregator(LLMContextAggregator): if frame.run_llm: await self.push_context_frame() - async def _handle_input_audio(self, frame: InputAudioRawFrame): - for s in self.interruption_strategies: - await s.append_audio(frame.audio, frame.sample_rate) - - async def _handle_user_started_speaking(self, frame: UserStartedSpeakingFrame): - self._user_speaking = True - self._waiting_for_aggregation = True - self._was_bot_speaking = self._bot_speaking - - # If we get a non-emulated UserStartedSpeakingFrame but we are in the - # middle of emulating VAD, let's stop emulating VAD (i.e. don't send the - # EmulateUserStoppedSpeakingFrame). - if not frame.emulated and self._emulating_vad: - self._emulating_vad = False - - async def _handle_user_stopped_speaking(self, _: UserStoppedSpeakingFrame): - self._user_speaking = False - # We just stopped speaking. Let's see if there's some aggregation to - # push. If the last thing we saw is an interim transcription, let's wait - # pushing the aggregation as we will probably get a final transcription. - if len(self._aggregation) > 0: - if not self._seen_interim_results: - await self.push_aggregation() - # Handles the case where both the user and the bot are not speaking, - # and the bot was previously speaking before the user interruption. - # So in this case we are resetting the aggregation timer - elif not self._seen_interim_results and self._was_bot_speaking and not self._bot_speaking: - # Reset aggregation timer. - self._aggregation_event.set() - - async def _handle_bot_started_speaking(self, _: BotStartedSpeakingFrame): - self._bot_speaking = True - - async def _handle_bot_stopped_speaking(self, _: BotStoppedSpeakingFrame): - self._bot_speaking = False - async def _handle_transcription(self, frame: TranscriptionFrame): text = frame.text @@ -443,101 +349,52 @@ class LLMUserAggregator(LLMContextAggregator): text, includes_inter_part_spaces=frame.includes_inter_frame_spaces ) ) - # We just got a final result, so let's reset interim results. - self._seen_interim_results = False - # Reset aggregation timer. - self._aggregation_event.set() - async def _handle_interim_transcription(self, _: InterimTranscriptionFrame): - self._seen_interim_results = True + async def _on_user_turn_started(self, strategy: BaseUserTurnStartStrategy): + await self._trigger_user_turn_start(strategy) - def _create_aggregation_task(self): - if not self._aggregation_task: - self._aggregation_task = self.create_task(self._aggregation_task_handler()) + async def _on_bot_turn_started(self, strategy: BaseBotTurnStartStrategy): + await self._trigger_bot_turn_start(strategy) - async def _cancel_aggregation_task(self): - if self._aggregation_task: - await self.cancel_task(self._aggregation_task) - self._aggregation_task = None + async def _on_push_frame( + self, + strategy: BaseUserTurnStartStrategy | BaseBotTurnStartStrategy, + frame: Frame, + direction: FrameDirection, + ): + await self.push_frame(frame, direction) - async def _aggregation_task_handler(self): - while True: - try: - # The _aggregation_task_handler handles two distinct timeout scenarios: - # - # 1. When emulating_vad=True: Wait for emulated VAD timeout before - # pushing aggregation (simulating VAD behavior when no actual VAD - # detection occurred). - # - # 2. When emulating_vad=False: Use aggregation_timeout as a buffer - # to wait for potential late-arriving transcription frames after - # a real VAD event. - # - # For emulated VAD scenarios, the timeout strategy depends on whether - # a turn analyzer is configured: - # - # - WITH turn analyzer: Use turn_emulated_vad_timeout parameter because - # the VAD's stop_secs is set very low (e.g. 0.2s) for rapid speech - # chunking to feed the turn analyzer. This low value is too fast - # for emulated VAD scenarios where we need to allow users time to - # finish speaking (e.g. 0.8s). - # - # - WITHOUT turn analyzer: Use VAD's stop_secs directly to maintain - # consistent user experience between real VAD detection and - # emulated VAD scenarios. - if not self._emulating_vad: - timeout = self._params.aggregation_timeout - elif self._turn_params: - timeout = self._params.turn_emulated_vad_timeout - else: - # Use VAD stop_secs when no turn analyzer is present, fallback if no VAD params - timeout = ( - self._vad_params.stop_secs - if self._vad_params - else self._params.turn_emulated_vad_timeout - ) - await asyncio.wait_for(self._aggregation_event.wait(), timeout=timeout) - await self._maybe_emulate_user_speaking() - except asyncio.TimeoutError: - if not self._user_speaking: - await self.push_aggregation() + async def _trigger_user_turn_start(self, strategy: BaseUserTurnStartStrategy): + if self._user_speaking: + return - # If we are emulating VAD we still need to send the user stopped - # speaking frame. - if self._emulating_vad: - await self.push_frame( - EmulateUserStoppedSpeakingFrame(), FrameDirection.UPSTREAM - ) - self._emulating_vad = False - finally: - self._aggregation_event.clear() + self._user_speaking = True - async def _maybe_emulate_user_speaking(self): - """Maybe emulate user speaking based on transcription. + logger.debug(f"User started speaking (user turn start strategy: {strategy})") - Emulate user speaking if we got a transcription but it was not - detected by VAD. Behavior when bot is speaking depends on the - enable_emulated_vad_interruptions parameter. - """ - # Check if we received a transcription but VAD was not able to detect - # voice (e.g. when you whisper a short utterance). In that case, we need - # to emulate VAD (i.e. user start/stopped speaking), but we do it only - # if the bot is not speaking. If the bot is speaking and we really have - # a short utterance we don't really want to interrupt the bot. - if ( - not self._user_speaking - and not self._waiting_for_aggregation - and len(self._aggregation) > 0 - ): - if self._bot_speaking and not self._params.enable_emulated_vad_interruptions: - # If emulated VAD interruptions are disabled and bot is speaking, ignore - logger.debug("Ignoring user speaking emulation, bot is speaking.") - await self.reset() - else: - # Either bot is not speaking, or emulated VAD interruptions are enabled - # - trigger user speaking emulation. - await self.push_frame(EmulateUserStartedSpeakingFrame(), FrameDirection.UPSTREAM) - self._emulating_vad = True + # Reset all user turn start strategies to start fresh. + if self.turn_start_strategies: + for s in self.turn_start_strategies.user: + await s.reset() + + await self.push_frame(UserStartedSpeakingFrame()) + await self.push_frame(InterruptionFrame()) + + async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy): + if not self._user_speaking: + return + + self._user_speaking = False + + logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})") + + # Reset all bot turn start strategies to start fresh. + if self.turn_start_strategies: + for s in self.turn_start_strategies.bot: + await s.reset() + + await self.push_frame(UserStoppedSpeakingFrame()) + await self.push_aggregation() class LLMAssistantAggregator(LLMContextAggregator): From 359ac302f541f47ff0e8994d83034bbff3b2461c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Tue, 16 Dec 2025 10:33:38 -0800 Subject: [PATCH 06/30] audio(interruptions): deprecate MinWordsInterruptionStrategy --- .../audio/interruptions/min_words_interruption_strategy.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/pipecat/audio/interruptions/min_words_interruption_strategy.py b/src/pipecat/audio/interruptions/min_words_interruption_strategy.py index 3f2dd5825..dad1277eb 100644 --- a/src/pipecat/audio/interruptions/min_words_interruption_strategy.py +++ b/src/pipecat/audio/interruptions/min_words_interruption_strategy.py @@ -17,6 +17,13 @@ class MinWordsInterruptionStrategy(BaseInterruptionStrategy): This is an interruption strategy based on a minimum number of words said by the user. That is, the strategy will be true if the user has said at least that amount of words. + + .. deprecated:: 0.0.99 + + This class is deprecated, use + `pipecat.turns.user.MinWordsUserTurnStartStrategy` with `PipelineTask`'s + new `turn_start_strategies` parameter instead. + """ def __init__(self, *, min_words: int): From 355fcf328238594ca451d2e5f4aa29ee6840b527 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 12 Nov 2025 15:33:24 -0800 Subject: [PATCH 07/30] BaseInputTransport: deprecate the use of turn analyzer in transport --- .../min_words_interruption_strategy.py | 10 + src/pipecat/transports/base_input.py | 257 +++++++++++------- src/pipecat/transports/base_transport.py | 4 + 3 files changed, 175 insertions(+), 96 deletions(-) diff --git a/src/pipecat/audio/interruptions/min_words_interruption_strategy.py b/src/pipecat/audio/interruptions/min_words_interruption_strategy.py index dad1277eb..f89026cc4 100644 --- a/src/pipecat/audio/interruptions/min_words_interruption_strategy.py +++ b/src/pipecat/audio/interruptions/min_words_interruption_strategy.py @@ -36,6 +36,16 @@ class MinWordsInterruptionStrategy(BaseInterruptionStrategy): self._min_words = min_words self._text = "" + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'pipecat.audio.interruptions' is deprecated. " + "Use the new interruption and speaking strategies.", + DeprecationWarning, + ) + async def append_text(self, text: str): """Append text for word count analysis. diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index e66007e04..c69328e9f 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -127,6 +127,17 @@ class BaseInputTransport(FrameProcessor): self._params.video_out_framerate = self._params.camera_out_framerate self._params.video_out_color_format = self._params.camera_out_color_format + if self._params.turn_analyzer: + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Parameter 'turn_analyzer' is deprecated, use `PipelineTask`'s new " + "`turn_start_strategies` parameter instead.", + DeprecationWarning, + ) + def enable_audio_in_stream_on_start(self, enabled: bool) -> None: """Enable or disable audio streaming on transport start. @@ -165,9 +176,28 @@ class BaseInputTransport(FrameProcessor): def turn_analyzer(self) -> Optional[BaseTurnAnalyzer]: """Get the turn-taking analyzer. + .. deprecated:: 0.0.99 + This method is deprecated and will be removed in a future version. + Use `PipelineTask`'s new `turn_start_strategies` parameter instead. + Returns: The turn analyzer instance if configured, None otherwise. """ + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Method 'turn_analyzer' is deprecated. Use `PipelineTask`'s new " + " `turn_start_strategies` parameter instead.", + DeprecationWarning, + ) + + logger.warning( + f"{self}: method 'turn_analyzer' is deprecated. Use `PipelineTask`'s new " + "`turn_start_strategies` parameter instead." + ) + return self._params.turn_analyzer async def start(self, frame: StartFrame): @@ -286,17 +316,17 @@ class BaseInputTransport(FrameProcessor): await self.cancel(frame) await self.push_frame(frame, direction) elif isinstance(frame, BotStartedSpeakingFrame): - await self._handle_bot_started_speaking(frame) + await self._deprecated_handle_bot_started_speaking(frame) await self.push_frame(frame, direction) elif isinstance(frame, BotStoppedSpeakingFrame): - await self._handle_bot_stopped_speaking(frame) + await self._deprecated_handle_bot_stopped_speaking(frame) await self.push_frame(frame, direction) elif isinstance(frame, EmulateUserStartedSpeakingFrame): logger.debug("Emulating user started speaking") - await self._handle_user_interruption(VADState.SPEAKING, emulated=True) + await self._deprecated_handle_user_interruption(VADState.SPEAKING, emulated=True) elif isinstance(frame, EmulateUserStoppedSpeakingFrame): logger.debug("Emulating user stopped speaking") - await self._handle_user_interruption(VADState.QUIET, emulated=True) + await self._deprecated_handle_user_interruption(VADState.QUIET, emulated=True) # All other system frames elif isinstance(frame, SystemFrame): await self.push_frame(frame, direction) @@ -326,10 +356,116 @@ class BaseInputTransport(FrameProcessor): await self.push_frame(frame, direction) # - # Handle interruptions + # Audio input # - async def _handle_user_interruption(self, vad_state: VADState, emulated: bool = False): + def _create_audio_task(self): + """Create the audio processing task if audio input is enabled.""" + if not self._audio_task and self._params.audio_in_enabled: + self._audio_in_queue = asyncio.Queue() + self._audio_task = self.create_task(self._audio_task_handler()) + + async def _cancel_audio_task(self): + """Cancel and cleanup the audio processing task.""" + if self._audio_task: + await self.cancel_task(self._audio_task) + self._audio_task = None + + async def _vad_analyze(self, audio_frame: InputAudioRawFrame) -> VADState: + """Analyze audio frame for voice activity.""" + state = VADState.QUIET + if self.vad_analyzer: + state = await self.vad_analyzer.analyze_audio(audio_frame.audio) + return state + + async def _new_handle_vad( + self, audio_frame: InputAudioRawFrame, vad_state: VADState + ) -> VADState: + """Handle Voice Activity Detection results and generate appropriate frames.""" + new_vad_state = await self._vad_analyze(audio_frame) + if ( + new_vad_state != vad_state + and new_vad_state != VADState.STARTING + and new_vad_state != VADState.STOPPING + ): + if new_vad_state == VADState.SPEAKING: + await self.push_frame(VADUserStartedSpeakingFrame()) + elif new_vad_state == VADState.QUIET: + await self.push_frame(VADUserStoppedSpeakingFrame()) + + vad_state = new_vad_state + return vad_state + + async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState) -> VADState: + """Handle Voice Activity Detection results and generate appropriate frames.""" + if self._params.turn_analyzer: + return await self._deprecated_handle_vad(audio_frame, vad_state) + else: + return await self._new_handle_vad(audio_frame, vad_state) + + async def _audio_task_handler(self): + """Main audio processing task handler for VAD and turn analysis.""" + vad_state: VADState = VADState.QUIET + while True: + try: + frame: InputAudioRawFrame = await asyncio.wait_for( + self._audio_in_queue.get(), timeout=AUDIO_INPUT_TIMEOUT_SECS + ) + + # If an audio filter is available, run it before VAD. + if self._params.audio_in_filter: + frame.audio = await self._params.audio_in_filter.filter(frame.audio) + + # Check VAD and push event if necessary. We just care about + # changes from QUIET to SPEAKING and vice versa. + previous_vad_state = vad_state + if self._params.vad_analyzer: + vad_state = await self._handle_vad(frame, vad_state) + + # DEPRECATED. + if self._params.turn_analyzer: + await self._deprecated_run_turn_analyzer(frame, vad_state, previous_vad_state) + + if vad_state == VADState.SPEAKING: + await self.broadcast_frame(UserSpeakingFrame) + + # Push audio downstream if passthrough is set. + if self._params.audio_in_passthrough: + await self.push_frame(frame) + + self._audio_in_queue.task_done() + except asyncio.TimeoutError: + if self._user_speaking: + logger.warning( + "Forcing VAD user stopped speaking due to timeout receiving audio frame!" + ) + vad_state = VADState.QUIET + if self._params.turn_analyzer: + self._params.turn_analyzer.clear() + + if self._params.turn_analyzer: + await self._deprecated_handle_user_interruption(VADState.QUIET) + else: + await self.push_frame(VADUserStoppedSpeakingFrame()) + + # + # DEPRECATED. + # + # The functions below are deprecated and should be removed once the old + # interruption strategies and turn analyzer are removed. + # + + async def _deprecated_handle_bot_started_speaking(self, frame: BotStartedSpeakingFrame): + """Update bot speaking state when bot starts speaking.""" + self._bot_speaking = True + + async def _deprecated_handle_bot_stopped_speaking(self, frame: BotStoppedSpeakingFrame): + """Update bot speaking state when bot stops speaking.""" + self._bot_speaking = False + + async def _deprecated_handle_user_interruption( + self, vad_state: VADState, emulated: bool = False + ): """Handle user interruption events based on speaking state.""" if vad_state == VADState.SPEAKING: logger.debug("User started speaking") @@ -358,42 +494,9 @@ class BaseInputTransport(FrameProcessor): await self.broadcast_frame(UserStoppedSpeakingFrame, emulated=emulated) - # - # Handle bot speaking state - # - - async def _handle_bot_started_speaking(self, frame: BotStartedSpeakingFrame): - """Update bot speaking state when bot starts speaking.""" - self._bot_speaking = True - - async def _handle_bot_stopped_speaking(self, frame: BotStoppedSpeakingFrame): - """Update bot speaking state when bot stops speaking.""" - self._bot_speaking = False - - # - # Audio input - # - - def _create_audio_task(self): - """Create the audio processing task if audio input is enabled.""" - if not self._audio_task and self._params.audio_in_enabled: - self._audio_in_queue = asyncio.Queue() - self._audio_task = self.create_task(self._audio_task_handler()) - - async def _cancel_audio_task(self): - """Cancel and cleanup the audio processing task.""" - if self._audio_task: - await self.cancel_task(self._audio_task) - self._audio_task = None - - async def _vad_analyze(self, audio_frame: InputAudioRawFrame) -> VADState: - """Analyze audio frame for voice activity.""" - state = VADState.QUIET - if self.vad_analyzer: - state = await self.vad_analyzer.analyze_audio(audio_frame.audio) - return state - - async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState) -> VADState: + async def _deprecated_handle_vad( + self, audio_frame: InputAudioRawFrame, vad_state: VADState + ) -> VADState: """Handle Voice Activity Detection results and generate appropriate frames.""" new_vad_state = await self._vad_analyze(audio_frame) if ( @@ -420,24 +523,30 @@ class BaseInputTransport(FrameProcessor): interruption_state = VADState.QUIET if interruption_state: - await self._handle_user_interruption(interruption_state) + await self._deprecated_handle_user_interruption(interruption_state) vad_state = new_vad_state return vad_state - async def _handle_end_of_turn(self): + async def _deprecated_handle_end_of_turn(self): """Handle end-of-turn analysis and generate prediction results.""" + # Don't use self._params.turn_analyzer so we can keep showing one + # deprecation warning. if self.turn_analyzer: - state, prediction = await self.turn_analyzer.analyze_end_of_turn() - await self._handle_prediction_result(prediction) - await self._handle_end_of_turn_complete(state) + state, prediction = await self._params.turn_analyzer.analyze_end_of_turn() + await self._deprecated_handle_prediction_result(prediction) + await self._deprecated_handle_end_of_turn_complete(state) - async def _handle_end_of_turn_complete(self, state: EndOfTurnState): + async def _deprecated_handle_end_of_turn_complete(self, state: EndOfTurnState): """Handle completion of end-of-turn analysis.""" if state == EndOfTurnState.COMPLETE: - await self._handle_user_interruption(VADState.QUIET) + await self._deprecated_handle_user_interruption(VADState.QUIET) - async def _run_turn_analyzer( + async def _deprecated_handle_prediction_result(self, result: MetricsData): + """Handle a prediction result event from the turn analyzer.""" + await self.push_frame(MetricsFrame(data=[result])) + + async def _deprecated_run_turn_analyzer( self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState ): """Run turn analysis on audio frame and handle results.""" @@ -445,51 +554,7 @@ class BaseInputTransport(FrameProcessor): # If silence exceeds threshold, we are going to receive EndOfTurnState.COMPLETE end_of_turn_state = self._params.turn_analyzer.append_audio(frame.audio, is_speech) if end_of_turn_state == EndOfTurnState.COMPLETE: - await self._handle_end_of_turn_complete(end_of_turn_state) + await self._deprecated_handle_end_of_turn_complete(end_of_turn_state) # Otherwise we are going to trigger to check if the turn is completed based on the VAD elif vad_state == VADState.QUIET and vad_state != previous_vad_state: - await self._handle_end_of_turn() - - async def _audio_task_handler(self): - """Main audio processing task handler for VAD and turn analysis.""" - vad_state: VADState = VADState.QUIET - while True: - try: - frame: InputAudioRawFrame = await asyncio.wait_for( - self._audio_in_queue.get(), timeout=AUDIO_INPUT_TIMEOUT_SECS - ) - - # If an audio filter is available, run it before VAD. - if self._params.audio_in_filter: - frame.audio = await self._params.audio_in_filter.filter(frame.audio) - - # Check VAD and push event if necessary. We just care about - # changes from QUIET to SPEAKING and vice versa. - previous_vad_state = vad_state - if self._params.vad_analyzer: - vad_state = await self._handle_vad(frame, vad_state) - - if self._params.turn_analyzer: - await self._run_turn_analyzer(frame, vad_state, previous_vad_state) - - if vad_state == VADState.SPEAKING: - await self.broadcast_frame(UserSpeakingFrame) - - # Push audio downstream if passthrough is set. - if self._params.audio_in_passthrough: - await self.push_frame(frame) - - self._audio_in_queue.task_done() - except asyncio.TimeoutError: - if self._user_speaking: - logger.warning( - "Forcing user stopped speaking due to timeout receiving audio frame!" - ) - vad_state = VADState.QUIET - if self._params.turn_analyzer: - self._params.turn_analyzer.clear() - await self._handle_user_interruption(VADState.QUIET) - - async def _handle_prediction_result(self, result: MetricsData): - """Handle a prediction result event from the turn analyzer.""" - await self.push_frame(MetricsFrame(data=[result])) + await self._deprecated_handle_end_of_turn() diff --git a/src/pipecat/transports/base_transport.py b/src/pipecat/transports/base_transport.py index e1f7ecb50..27122866c 100644 --- a/src/pipecat/transports/base_transport.py +++ b/src/pipecat/transports/base_transport.py @@ -113,6 +113,10 @@ class TransportParams(BaseModel): vad_analyzer: Voice Activity Detection analyzer instance. turn_analyzer: Turn-taking analyzer instance for conversation management. + + .. deprecated:: 0.0.99 + The `turn_analyzer` parameter is deprecated, use speaking strategies instead. + """ model_config = ConfigDict(arbitrary_types_allowed=True) From 76561da850d8bcfd4e2e529269020c9ba0540851 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 8 Dec 2025 18:22:02 -0800 Subject: [PATCH 08/30] TranscriptionBotTurnStartStrategy: improve by using interim transcriptions --- .../transcription_bot_turn_start_strategy.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/pipecat/turns/bot/transcription_bot_turn_start_strategy.py b/src/pipecat/turns/bot/transcription_bot_turn_start_strategy.py index 36541f765..b8cfd70e6 100644 --- a/src/pipecat/turns/bot/transcription_bot_turn_start_strategy.py +++ b/src/pipecat/turns/bot/transcription_bot_turn_start_strategy.py @@ -11,6 +11,7 @@ from typing import Optional from pipecat.frames.frames import ( Frame, + InterimTranscriptionFrame, TranscriptionFrame, VADUserStartedSpeakingFrame, VADUserStoppedSpeakingFrame, @@ -38,6 +39,7 @@ class TranscriptionBotTurnStartStrategy(BaseBotTurnStartStrategy): self._timeout = timeout self._text = "" self._vad_user_speaking = False + self._seen_interim_results = False self._event = asyncio.Event() self._task: Optional[asyncio.Task] = None @@ -46,6 +48,7 @@ class TranscriptionBotTurnStartStrategy(BaseBotTurnStartStrategy): await super().reset() self._text = "" self._vad_user_speaking = False + self._seen_interim_results = False self._event.clear() async def setup(self, task_manager: BaseTaskManager): @@ -78,6 +81,8 @@ class TranscriptionBotTurnStartStrategy(BaseBotTurnStartStrategy): await self._handle_vad_user_started_speaking(frame) elif isinstance(frame, VADUserStoppedSpeakingFrame): await self._handle_vad_user_stopped_speaking(frame) + elif isinstance(frame, InterimTranscriptionFrame): + await self._handle_interim_transcription(frame) elif isinstance(frame, TranscriptionFrame): await self._handle_transcription(frame) @@ -88,10 +93,17 @@ class TranscriptionBotTurnStartStrategy(BaseBotTurnStartStrategy): async def _handle_vad_user_stopped_speaking(self, _: VADUserStoppedSpeakingFrame): """Handle when the VAD indicates the user has stopped speaking.""" self._vad_user_speaking = False + await self._maybe_trigger_bot_turn_started() + + async def _handle_interim_transcription(self, frame: InterimTranscriptionFrame): + self._seen_interim_results = True async def _handle_transcription(self, frame: TranscriptionFrame): """Handle user transcription.""" self._text += frame.text + # We just got a final result, so let's reset interim results. + self._seen_interim_results = False + # Reset aggregation timer. self._event.set() async def _task_handler(self): @@ -107,5 +119,8 @@ class TranscriptionBotTurnStartStrategy(BaseBotTurnStartStrategy): await asyncio.wait_for(self._event.wait(), timeout=self._timeout) self._event.clear() except asyncio.TimeoutError: - if self._text and not self._vad_user_speaking: - await self.trigger_bot_turn_started() + await self._maybe_trigger_bot_turn_started() + + async def _maybe_trigger_bot_turn_started(self): + if not self._vad_user_speaking and not self._seen_interim_results and self._text: + await self.trigger_bot_turn_started() From 7e69288898b99d7e2b627572771d8a04250403ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 8 Dec 2025 18:22:58 -0800 Subject: [PATCH 09/30] tests: added bot turn start strategies unit tests --- pyproject.toml | 2 +- tests/test_bot_turn_start_strategy.py | 476 ++++++++++++++++++++++++++ tests/test_langchain.py | 14 +- uv.lock | 9 +- 4 files changed, 491 insertions(+), 10 deletions(-) create mode 100644 tests/test_bot_turn_start_strategy.py diff --git a/pyproject.toml b/pyproject.toml index 23ac8490c..b927bafe4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -125,7 +125,7 @@ dev = [ "pre-commit~=4.2.0", "pyright>=1.1.404,<1.2", "pytest~=8.4.1", - "pytest-asyncio~=1.1.0", + "pytest-asyncio~=1.3.0", "pytest-aiohttp==1.1.0", "ruff>=0.12.11,<1", "setuptools~=78.1.1", diff --git a/tests/test_bot_turn_start_strategy.py b/tests/test_bot_turn_start_strategy.py new file mode 100644 index 000000000..781688f21 --- /dev/null +++ b/tests/test_bot_turn_start_strategy.py @@ -0,0 +1,476 @@ +# +# Copyright (c) 2024-2025 Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import unittest + +from pipecat.frames.frames import ( + InterimTranscriptionFrame, + TranscriptionFrame, + VADUserStartedSpeakingFrame, + VADUserStoppedSpeakingFrame, +) +from pipecat.turns.bot.transcription_bot_turn_start_strategy import ( + TranscriptionBotTurnStartStrategy, +) +from pipecat.utils.asyncio.task_manager import TaskManager, TaskManagerParams + +AGGREGATION_TIMEOUT = 0.1 + + +class TestTranscriptionBotTurnStartStrategy(unittest.IsolatedAsyncioTestCase): + async def asyncSetUp(self) -> None: + self.task_manager = TaskManager() + self.task_manager.setup(TaskManagerParams(loop=asyncio.get_running_loop())) + + async def test_ste(self): + strategy = TranscriptionBotTurnStartStrategy() + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # T + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="cat", timestamp="")) + self.assertIsNone(should_start) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + + # Transcription comes in between user started/stopped and there are not + # interim, we just trigger bot speech. + self.assertTrue(should_start) + + async def test_site(self): + strategy = TranscriptionBotTurnStartStrategy() + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="Hello!", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # T + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="cat", timestamp="")) + self.assertIsNone(should_start) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + + # Transcription comes in between user started/stopped, so we trigger + # speech right away. + self.assertTrue(should_start) + + async def test_st1iest2e(self): + strategy = TranscriptionBotTurnStartStrategy() + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # T1 + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="cat", timestamp="")) + self.assertIsNone(should_start) + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="Hello!", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + self.assertIsNone(should_start) + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # T2 + await strategy.process_frame( + TranscriptionFrame(text="How are you?", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + + # There was an interim before the first user stopped speaking, then we + # got a transcription comes in between user started/stopped, so we + # trigger speech right away. + self.assertTrue(should_start) + + async def test_siet(self): + strategy = TranscriptionBotTurnStartStrategy(timeout=AGGREGATION_TIMEOUT) + await strategy.setup(self.task_manager) + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="How", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + self.assertIsNone(should_start) + + # T + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="cat", timestamp="")) + self.assertIsNone(should_start) + + # Transcription comes after user stopped speaking, we need to wait for + # at least the aggregation timeout. + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + self.assertTrue(should_start) + + async def test_sieit(self): + strategy = TranscriptionBotTurnStartStrategy(timeout=AGGREGATION_TIMEOUT) + await strategy.setup(self.task_manager) + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="How", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + self.assertIsNone(should_start) + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="are you?", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # T + await strategy.process_frame( + TranscriptionFrame(text="How are you?", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # Transcription comes after user stopped speaking, we need to wait for + # at least the aggregation timeout. + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + self.assertTrue(should_start) + + async def test_set(self): + strategy = TranscriptionBotTurnStartStrategy(timeout=AGGREGATION_TIMEOUT) + await strategy.setup(self.task_manager) + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + self.assertIsNone(should_start) + + # T + await strategy.process_frame( + TranscriptionFrame(text="How are you?", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # Transcription comes after user stopped speaking, we need to wait for + # at least the aggregation timeout. + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + self.assertTrue(should_start) + + async def test_seit(self): + strategy = TranscriptionBotTurnStartStrategy(timeout=AGGREGATION_TIMEOUT) + await strategy.setup(self.task_manager) + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + self.assertIsNone(should_start) + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="How", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # T + await strategy.process_frame( + TranscriptionFrame(text="How are you?", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # Transcription comes after user stopped speaking, we need to wait for + # at least the aggregation timeout. + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + self.assertTrue(should_start) + + async def test_st1et2(self): + strategy = TranscriptionBotTurnStartStrategy(timeout=AGGREGATION_TIMEOUT) + await strategy.setup(self.task_manager) + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # T1 + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="cat", timestamp="")) + self.assertIsNone(should_start) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + + # Transcription comes between user start/stopped speaking, we need to + # trigger speech right away. + self.assertTrue(should_start) + should_start = None + + # T2 + await strategy.process_frame( + TranscriptionFrame(text="How are you?", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # Transcription comes after user stopped speaking, we need to wait for + # at least the aggregation timeout. + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + self.assertTrue(should_start) + + async def test_set1t2(self): + strategy = TranscriptionBotTurnStartStrategy(timeout=AGGREGATION_TIMEOUT) + await strategy.setup(self.task_manager) + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + self.assertIsNone(should_start) + + # T1 + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="cat", timestamp="")) + self.assertIsNone(should_start) + + # T2 + await strategy.process_frame( + TranscriptionFrame(text="How are you?", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # Transcription comes after user stopped speaking, we need to wait for + # at least the aggregation timeout. + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + self.assertTrue(should_start) + + async def test_siet1it2(self): + strategy = TranscriptionBotTurnStartStrategy(timeout=AGGREGATION_TIMEOUT) + await strategy.setup(self.task_manager) + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="Hello!", user_id="cat", timestamp="") + ) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + self.assertIsNone(should_start) + + # T1 + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="cat", timestamp="")) + self.assertIsNone(should_start) + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="How", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # T2 + await strategy.process_frame( + TranscriptionFrame(text="How are you?", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # Transcription comes after user stopped speaking, we need to wait for + # at least the aggregation timeout. + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + self.assertTrue(should_start) + + async def test_t(self): + strategy = TranscriptionBotTurnStartStrategy(timeout=AGGREGATION_TIMEOUT) + await strategy.setup(self.task_manager) + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # T + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="cat", timestamp="")) + self.assertIsNone(should_start) + + # Transcription comes after user stopped speaking, we need to wait for + # at least the aggregation timeout. + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + self.assertTrue(should_start) + + async def test_it(self): + strategy = TranscriptionBotTurnStartStrategy(timeout=AGGREGATION_TIMEOUT) + await strategy.setup(self.task_manager) + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="Hello!", user_id="cat", timestamp="") + ) + + # T + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="cat", timestamp="")) + self.assertIsNone(should_start) + + # Transcription comes after user stopped speaking, we need to wait for + # at least the aggregation timeout. + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + self.assertTrue(should_start) + + async def test_sie_delay_it(self): + strategy = TranscriptionBotTurnStartStrategy(timeout=AGGREGATION_TIMEOUT) + await strategy.setup(self.task_manager) + + should_start = None + + @strategy.event_handler("on_bot_turn_started") + async def on_bot_turn_started(strategy): + nonlocal should_start + should_start = True + + # S + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertIsNone(should_start) + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="Hello!", user_id="cat", timestamp="") + ) + + # E + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + self.assertIsNone(should_start) + + # Delay + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + + # I + await strategy.process_frame( + InterimTranscriptionFrame(text="How", user_id="cat", timestamp="") + ) + + # T + await strategy.process_frame( + TranscriptionFrame(text="How are you?", user_id="cat", timestamp="") + ) + self.assertIsNone(should_start) + + # Transcription comes after user stopped speaking, we need to wait for + # at least the aggregation timeout. + await asyncio.sleep(AGGREGATION_TIMEOUT + 0.1) + self.assertTrue(should_start) diff --git a/tests/test_langchain.py b/tests/test_langchain.py index 4e197b2aa..21d8428eb 100644 --- a/tests/test_langchain.py +++ b/tests/test_langchain.py @@ -10,6 +10,7 @@ from langchain.prompts import ChatPromptTemplate from langchain_core.language_models import FakeStreamingListLLM from pipecat.frames.frames import ( + InterruptionFrame, LLMContextAssistantTimestampFrame, LLMContextFrame, LLMFullResponseEndFrame, @@ -18,12 +19,11 @@ from pipecat.frames.frames import ( TranscriptionFrame, UserStartedSpeakingFrame, UserStoppedSpeakingFrame, + VADUserStartedSpeakingFrame, + VADUserStoppedSpeakingFrame, ) from pipecat.pipeline.pipeline import Pipeline from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, -) from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.processors.frame_processor import FrameProcessor from pipecat.processors.frameworks.langchain import LangchainProcessor @@ -72,13 +72,17 @@ class TestLangchain(unittest.IsolatedAsyncioTestCase): ) frames_to_send = [ - UserStartedSpeakingFrame(), + VADUserStartedSpeakingFrame(), TranscriptionFrame(text="Hi World", user_id="user", timestamp="now"), SleepFrame(), - UserStoppedSpeakingFrame(), + VADUserStoppedSpeakingFrame(), + SleepFrame(sleep=1.0), ] expected_down_frames = [ + VADUserStartedSpeakingFrame, UserStartedSpeakingFrame, + InterruptionFrame, + VADUserStoppedSpeakingFrame, UserStoppedSpeakingFrame, LLMContextFrame, LLMContextAssistantTimestampFrame, diff --git a/uv.lock b/uv.lock index cede216c2..f9b826937 100644 --- a/uv.lock +++ b/uv.lock @@ -4191,7 +4191,7 @@ dev = [ { name = "pyright", specifier = ">=1.1.404,<1.2" }, { name = "pytest", specifier = "~=8.4.1" }, { name = "pytest-aiohttp", specifier = "==1.1.0" }, - { name = "pytest-asyncio", specifier = "~=1.1.0" }, + { name = "pytest-asyncio", specifier = "~=1.3.0" }, { name = "python-dotenv", specifier = ">=1.0.1,<2.0.0" }, { name = "ruff", specifier = ">=0.12.11,<1" }, { name = "setuptools", specifier = "~=78.1.1" }, @@ -4828,15 +4828,16 @@ wheels = [ [[package]] name = "pytest-asyncio" -version = "1.1.1" +version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "backports-asyncio-runner", marker = "python_full_version < '3.11'" }, { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8d/1e/2aa43805d4a320a9489d2b99f7877b69f9094c79aa0732159a1415dd6cd4/pytest_asyncio-1.1.1.tar.gz", hash = "sha256:b72d215c38e2c91dbb32f275e0b5be69602d7869910e109360e375129960a649", size = 46590, upload-time = "2025-09-12T06:36:20.834Z" } +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/28/de/aba79e9ccdb51b5d0d65c67dd857bd78b00c64723df16b9fc800d8b94ce6/pytest_asyncio-1.1.1-py3-none-any.whl", hash = "sha256:726339d30fcfde24691f589445b9b67d058b311ac632b1d704e97f20f1d878da", size = 14719, upload-time = "2025-09-12T06:36:19.726Z" }, + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" }, ] [[package]] From 3d62b9c203b6ddc8faf829e6a050d00404071af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 8 Dec 2025 18:42:38 -0800 Subject: [PATCH 10/30] tests: added user turn start strategies unit tests --- tests/test_interruption_strategies.py | 2 +- tests/test_user_turn_start_strategy.py | 106 +++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 tests/test_user_turn_start_strategy.py diff --git a/tests/test_interruption_strategies.py b/tests/test_interruption_strategies.py index aa1bd7625..7700f77c8 100644 --- a/tests/test_interruption_strategies.py +++ b/tests/test_interruption_strategies.py @@ -9,7 +9,7 @@ import unittest from pipecat.audio.interruptions.min_words_interruption_strategy import MinWordsInterruptionStrategy -class TestInterruptionStrategy(unittest.IsolatedAsyncioTestCase): +class TestMinWordsInterruptionStrategy(unittest.IsolatedAsyncioTestCase): async def test_min_words(self): strategy = MinWordsInterruptionStrategy(min_words=2) await strategy.append_text("Hello") diff --git a/tests/test_user_turn_start_strategy.py b/tests/test_user_turn_start_strategy.py new file mode 100644 index 000000000..144dd15cc --- /dev/null +++ b/tests/test_user_turn_start_strategy.py @@ -0,0 +1,106 @@ +# +# Copyright (c) 2024-2025 Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import unittest + +from pipecat.frames.frames import ( + InterimTranscriptionFrame, + TranscriptionFrame, + VADUserStartedSpeakingFrame, + VADUserStoppedSpeakingFrame, +) +from pipecat.turns.user.min_words_user_turn_start_strategy import MinWordsUserTurnStartStrategy +from pipecat.turns.user.vad_user_turn_start_strategy import VADUserTurnStartStrategy + + +class TestMinWordsInterruptionStrategy(unittest.IsolatedAsyncioTestCase): + async def test_only_transcriptions(self): + strategy = MinWordsUserTurnStartStrategy(min_words=2) + + should_start = None + + @strategy.event_handler("on_user_turn_started") + async def on_user_turn_started(strategy): + nonlocal should_start + should_start = True + + await strategy.process_frame(TranscriptionFrame(text="Hello", user_id="cat", timestamp="")) + self.assertFalse(should_start) + + await strategy.process_frame( + TranscriptionFrame(text=" there!", user_id="cat", timestamp="") + ) + self.assertTrue(should_start) + + # Reset and check again + should_start = None + await strategy.reset() + + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="cat", timestamp="")) + self.assertFalse(should_start) + + await strategy.process_frame( + TranscriptionFrame(text="How are you?", user_id="cat", timestamp="") + ) + self.assertTrue(should_start) + + async def test_only_interim_transcriptions(self): + strategy = MinWordsUserTurnStartStrategy(min_words=2) + + should_start = None + + @strategy.event_handler("on_user_turn_started") + async def on_user_turn_started(strategy): + nonlocal should_start + should_start = True + + await strategy.process_frame( + InterimTranscriptionFrame(text="Hello", user_id="cat", timestamp="") + ) + self.assertFalse(should_start) + + await strategy.process_frame( + InterimTranscriptionFrame(text="Hello there!", user_id="cat", timestamp="") + ) + self.assertTrue(should_start) + + async def test_all_transcriptions(self): + strategy = MinWordsUserTurnStartStrategy(min_words=2) + + should_start = None + + @strategy.event_handler("on_user_turn_started") + async def on_user_turn_started(strategy): + nonlocal should_start + should_start = True + + await strategy.process_frame( + InterimTranscriptionFrame(text="Hello", user_id="cat", timestamp="") + ) + self.assertFalse(should_start) + + await strategy.process_frame( + TranscriptionFrame(text="Hello there!", user_id="cat", timestamp="") + ) + self.assertTrue(should_start) + + +class TestVADUserTurnStartStrategy(unittest.IsolatedAsyncioTestCase): + async def test_vad_strategy(self): + strategy = VADUserTurnStartStrategy() + + should_start = None + + @strategy.event_handler("on_user_turn_started") + async def on_user_turn_started(strategy): + nonlocal should_start + should_start = True + + await strategy.process_frame(VADUserStoppedSpeakingFrame()) + self.assertFalse(should_start) + + await strategy.process_frame(VADUserStartedSpeakingFrame()) + self.assertTrue(should_start) From 962eb73cc4ae3f008920cbc80526c8240cf602ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 10 Dec 2025 19:01:56 -0800 Subject: [PATCH 11/30] frames: deprecated EmulateUserStartedSpeakingFrame/EmulateUserStoppedSpeakingFrame --- src/pipecat/frames/frames.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 03ad7d41b..5afe95166 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -1136,6 +1136,9 @@ class EmulateUserStartedSpeakingFrame(SystemFrame): Emitted by internal processors upstream to emulate VAD behavior when a user starts speaking. + + .. deprecated:: 0.0.99 + This frame is deprecated and will be removed in a future version. """ pass @@ -1147,6 +1150,9 @@ class EmulateUserStoppedSpeakingFrame(SystemFrame): Emitted by internal processors upstream to emulate VAD behavior when a user stops speaking. + + .. deprecated:: 0.0.99 + This frame is deprecated and will be removed in a future version. """ pass From d33c72a8b0bd7b874f70bcd5099f6f3aa20193cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 11 Dec 2025 10:29:56 -0800 Subject: [PATCH 12/30] LLMUserAggregator: allow external user started/stopped speaking frames --- .../aggregators/llm_response_universal.py | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index df3f707c3..a007c5ebd 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -15,6 +15,7 @@ import asyncio import json import warnings from abc import abstractmethod +from dataclasses import dataclass from typing import Any, Dict, List, Literal, Optional, Set from loguru import logger @@ -58,7 +59,6 @@ from pipecat.processors.aggregators.llm_context import ( ) from pipecat.processors.aggregators.llm_response import ( LLMAssistantAggregatorParams, - LLMUserAggregatorParams, ) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy @@ -67,6 +67,21 @@ from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregate from pipecat.utils.time import time_now_iso8601 +@dataclass +class LLMUserAggregatorParams: + """Parameters for configuring LLM user aggregation behavior. + + Parameters: + enable_user_speaking_frames: If True, the aggregator will emit frames + indicating when the user starts and stops speaking, as well as + interruption frames. This is enabled by default, but you may want + to disable it if another component (e.g., an STT service) is already + generating these frames. + """ + + enable_user_speaking_frames: bool = True + + class LLMContextAggregator(FrameProcessor): """Base LLM aggregator that uses an LLMContext for conversation storage. @@ -370,15 +385,15 @@ class LLMUserAggregator(LLMContextAggregator): self._user_speaking = True - logger.debug(f"User started speaking (user turn start strategy: {strategy})") - # Reset all user turn start strategies to start fresh. if self.turn_start_strategies: for s in self.turn_start_strategies.user: await s.reset() - await self.push_frame(UserStartedSpeakingFrame()) - await self.push_frame(InterruptionFrame()) + if self._params.enable_user_speaking_frames: + logger.debug(f"User started speaking (user turn start strategy: {strategy})") + await self.push_frame(UserStartedSpeakingFrame()) + await self.push_frame(InterruptionFrame()) async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy): if not self._user_speaking: @@ -386,14 +401,16 @@ class LLMUserAggregator(LLMContextAggregator): self._user_speaking = False - logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})") - # Reset all bot turn start strategies to start fresh. if self.turn_start_strategies: for s in self.turn_start_strategies.bot: await s.reset() - await self.push_frame(UserStoppedSpeakingFrame()) + if self._params.enable_user_speaking_frames: + logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})") + await self.push_frame(UserStoppedSpeakingFrame()) + + # Always push context frame. await self.push_aggregation() From 7e6b0839b09bc731b6642122ea8591b2525222e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 11 Dec 2025 14:11:10 -0800 Subject: [PATCH 13/30] examples(foundational): don't use legacy LLMUserAggregatorParams --- .../foundational/07a-interruptible-speechmatics-vad.py | 8 +------- examples/foundational/07a-interruptible-speechmatics.py | 8 +------- examples/foundational/07l-interruptible-groq.py | 5 +---- examples/foundational/14f-function-calling-groq.py | 5 +---- examples/foundational/14s-function-calling-sambanova.py | 5 +---- src/pipecat/services/openai/realtime/llm.py | 1 - 6 files changed, 5 insertions(+), 27 deletions(-) diff --git a/examples/foundational/07a-interruptible-speechmatics-vad.py b/examples/foundational/07a-interruptible-speechmatics-vad.py index 1a58e724f..db3b95465 100644 --- a/examples/foundational/07a-interruptible-speechmatics-vad.py +++ b/examples/foundational/07a-interruptible-speechmatics-vad.py @@ -15,9 +15,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMUserAggregatorParams, -) from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport @@ -132,10 +129,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ] context = LLMContext(messages) - context_aggregator = LLMContextAggregatorPair( - context, - user_params=LLMUserAggregatorParams(aggregation_timeout=0.005), - ) + context_aggregator = LLMContextAggregatorPair(context) pipeline = Pipeline( [ diff --git a/examples/foundational/07a-interruptible-speechmatics.py b/examples/foundational/07a-interruptible-speechmatics.py index 558caff0a..9d93a4464 100644 --- a/examples/foundational/07a-interruptible-speechmatics.py +++ b/examples/foundational/07a-interruptible-speechmatics.py @@ -19,9 +19,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMUserAggregatorParams, -) from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport @@ -121,10 +118,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ] context = LLMContext(messages) - context_aggregator = LLMContextAggregatorPair( - context, - user_params=LLMUserAggregatorParams(aggregation_timeout=0.005), - ) + context_aggregator = LLMContextAggregatorPair(context) pipeline = Pipeline( [ diff --git a/examples/foundational/07l-interruptible-groq.py b/examples/foundational/07l-interruptible-groq.py index 6938a1598..b184c9193 100644 --- a/examples/foundational/07l-interruptible-groq.py +++ b/examples/foundational/07l-interruptible-groq.py @@ -19,7 +19,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMUserAggregatorParams from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport @@ -76,9 +75,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ] context = LLMContext(messages) - context_aggregator = LLMContextAggregatorPair( - context, user_params=LLMUserAggregatorParams(aggregation_timeout=0.05) - ) + context_aggregator = LLMContextAggregatorPair(context) pipeline = Pipeline( [ diff --git a/examples/foundational/14f-function-calling-groq.py b/examples/foundational/14f-function-calling-groq.py index 53eb2de75..9a5ab27c0 100644 --- a/examples/foundational/14f-function-calling-groq.py +++ b/examples/foundational/14f-function-calling-groq.py @@ -21,7 +21,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMUserAggregatorParams from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport @@ -109,9 +108,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ] context = LLMContext(messages, tools) - context_aggregator = LLMContextAggregatorPair( - context, user_params=LLMUserAggregatorParams(aggregation_timeout=0.05) - ) + context_aggregator = LLMContextAggregatorPair(context) pipeline = Pipeline( [ diff --git a/examples/foundational/14s-function-calling-sambanova.py b/examples/foundational/14s-function-calling-sambanova.py index dae1531bc..3690fa9d7 100644 --- a/examples/foundational/14s-function-calling-sambanova.py +++ b/examples/foundational/14s-function-calling-sambanova.py @@ -21,7 +21,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMUserAggregatorParams from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport @@ -115,9 +114,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ] context = LLMContext(messages, tools) - context_aggregator = LLMContextAggregatorPair( - context, user_params=LLMUserAggregatorParams(aggregation_timeout=0.05) - ) + context_aggregator = LLMContextAggregatorPair(context) pipeline = Pipeline( [ diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index 15cde34c7..c7bc40bce 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -23,7 +23,6 @@ from pipecat.frames.frames import ( BotStoppedSpeakingFrame, CancelFrame, EndFrame, - ErrorFrame, Frame, InputAudioRawFrame, InterimTranscriptionFrame, From c4c4b4107b63860debe400af17253f143454db78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 11 Dec 2025 15:01:46 -0800 Subject: [PATCH 14/30] TurnAnalyzerBotTurnStartStrategy: broadcast SpeechControlParamsFrame --- src/pipecat/frames/frames.py | 4 ++-- .../aggregators/llm_response_universal.py | 14 +++++++++-- .../turns/bot/base_bot_turn_start_strategy.py | 22 +++++++++++++++++- .../turn_analyzer_bot_turn_start_strategy.py | 9 +++----- .../user/base_user_turn_start_strategy.py | 23 ++++++++++++++++++- 5 files changed, 60 insertions(+), 12 deletions(-) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 5afe95166..1b43f8ba3 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -30,7 +30,7 @@ from typing import ( from pipecat.adapters.schemas.tools_schema import ToolsSchema from pipecat.audio.dtmf.types import KeypadEntry as NewKeypadEntry from pipecat.audio.interruptions.base_interruption_strategy import BaseInterruptionStrategy -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams +from pipecat.audio.turn.base_turn_analyzer import BaseTurnParams from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.metrics.metrics import MetricsData from pipecat.transcriptions.language import Language @@ -1550,7 +1550,7 @@ class SpeechControlParamsFrame(SystemFrame): """ vad_params: Optional[VADParams] = None - turn_params: Optional[SmartTurnParams] = None + turn_params: Optional[BaseTurnParams] = None # diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index a007c5ebd..241b5dbac 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -16,7 +16,7 @@ import json import warnings from abc import abstractmethod from dataclasses import dataclass -from typing import Any, Dict, List, Literal, Optional, Set +from typing import Any, Dict, List, Literal, Optional, Set, Type from loguru import logger @@ -310,11 +310,13 @@ class LLMUserAggregator(LLMContextAggregator): for s in self.turn_start_strategies.user: await s.setup(self.task_manager) s.add_event_handler("on_push_frame", self._on_push_frame) + s.add_event_handler("on_broadcast_frame", self._on_broadcast_frame) s.add_event_handler("on_user_turn_started", self._on_user_turn_started) for s in self.turn_start_strategies.bot: await s.setup(self.task_manager) s.add_event_handler("on_push_frame", self._on_push_frame) + s.add_event_handler("on_broadcast_frame", self._on_broadcast_frame) s.add_event_handler("on_bot_turn_started", self._on_bot_turn_started) async def _stop(self, frame: EndFrame): @@ -375,10 +377,18 @@ class LLMUserAggregator(LLMContextAggregator): self, strategy: BaseUserTurnStartStrategy | BaseBotTurnStartStrategy, frame: Frame, - direction: FrameDirection, + direction: FrameDirection = FrameDirection.DOWNSTREAM, ): await self.push_frame(frame, direction) + async def _on_broadcast_frame( + self, + strategy: BaseUserTurnStartStrategy | BaseBotTurnStartStrategy, + frame_cls: Type[Frame], + **kwargs, + ): + await self.broadcast_frame(frame_cls, **kwargs) + async def _trigger_user_turn_start(self, strategy: BaseUserTurnStartStrategy): if self._user_speaking: return diff --git a/src/pipecat/turns/bot/base_bot_turn_start_strategy.py b/src/pipecat/turns/bot/base_bot_turn_start_strategy.py index bf804ba9c..3d5222169 100644 --- a/src/pipecat/turns/bot/base_bot_turn_start_strategy.py +++ b/src/pipecat/turns/bot/base_bot_turn_start_strategy.py @@ -6,9 +6,10 @@ """Base turn start strategy for determining when the bot should start speaking.""" -from typing import Optional +from typing import Optional, Type from pipecat.frames.frames import Frame +from pipecat.processors.frame_processor import FrameDirection from pipecat.utils.asyncio.task_manager import BaseTaskManager from pipecat.utils.base_object import BaseObject @@ -32,6 +33,7 @@ class BaseBotTurnStartStrategy(BaseObject): super().__init__(**kwargs) self._task_manager: Optional[BaseTaskManager] = None self._register_event_handler("on_push_frame", sync=True) + self._register_event_handler("on_broadcast_frame", sync=True) self._register_event_handler("on_bot_turn_started", sync=True) @property @@ -69,6 +71,24 @@ class BaseBotTurnStartStrategy(BaseObject): """ pass + async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM): + """Emit on_push_frame to push a frame using the user aggreagtor. + + Args: + frame: The frame to be pushed. + direction: What direction the frame should be pushed to. + """ + await self._call_event_handler("on_push_frame", frame, direction) + + async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs): + """Emit on_broadcast_frame to broadcast a frame using the user aggreagtor. + + Args: + frame_cls: The class of the frame to be broadcasted. + **kwargs: Keyword arguments to be passed to the frame's constructor. + """ + await self._call_event_handler("on_broadcast_frame", frame_cls, **kwargs) + async def trigger_bot_turn_started(self): """Trigger the `on_bot_turn_started` event.""" await self._call_event_handler("on_bot_turn_started") diff --git a/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py b/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py index 4e09338fb..5d2aaf020 100644 --- a/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py +++ b/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py @@ -15,13 +15,13 @@ from pipecat.frames.frames import ( InputAudioRawFrame, InterimTranscriptionFrame, MetricsFrame, + SpeechControlParamsFrame, StartFrame, TranscriptionFrame, VADUserStartedSpeakingFrame, VADUserStoppedSpeakingFrame, ) from pipecat.metrics.metrics import MetricsData -from pipecat.processors.frame_processor import FrameDirection from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy from pipecat.utils.asyncio.task_manager import BaseTaskManager @@ -95,6 +95,7 @@ class TurnAnalyzerBotTurnStartStrategy(BaseBotTurnStartStrategy): async def _start(self, frame: StartFrame): """Process the start frame to configure the turn analyzer.""" self._turn_analyzer.set_sample_rate(frame.audio_in_sample_rate) + await self.broadcast_frame(SpeechControlParamsFrame, turn_params=self._turn_analyzer.params) async def _handle_input_audio(self, frame: InputAudioRawFrame): """Handle input audio to check if the turn is completed.""" @@ -129,11 +130,7 @@ class TurnAnalyzerBotTurnStartStrategy(BaseBotTurnStartStrategy): async def _handle_prediction_result(self, result: Optional[MetricsData]): """Handle a prediction result event from the turn analyzer.""" if result: - await self._call_event_handler( - "on_push_frame", - MetricsFrame(data=[result]), - FrameDirection.DOWNSTREAM, - ) + await self.push_frame(MetricsFrame(data=[result])) async def _task_handler(self): """Asynchronously monitor events and trigger bot turn when appropriate. diff --git a/src/pipecat/turns/user/base_user_turn_start_strategy.py b/src/pipecat/turns/user/base_user_turn_start_strategy.py index 216932f45..cb825320a 100644 --- a/src/pipecat/turns/user/base_user_turn_start_strategy.py +++ b/src/pipecat/turns/user/base_user_turn_start_strategy.py @@ -6,9 +6,10 @@ """Base turn start strategy for determining when the user starts speaking.""" -from typing import Optional +from typing import Optional, Type from pipecat.frames.frames import Frame +from pipecat.processors.frame_processor import FrameDirection from pipecat.utils.asyncio.task_manager import BaseTaskManager from pipecat.utils.base_object import BaseObject @@ -23,6 +24,7 @@ class BaseUserTurnStartStrategy(BaseObject): Events triggered by user turn start strategies: - `on_push_frame`: Indicates the strategy wants to push a frame. + - `on_broadcast_frame`: Indicates the strategy wants to broadcast a frame. - `on_user_turn_started`: Signals that a user turn has started. """ @@ -31,6 +33,7 @@ class BaseUserTurnStartStrategy(BaseObject): super().__init__(**kwargs) self._task_manager: Optional[BaseTaskManager] = None self._register_event_handler("on_push_frame", sync=True) + self._register_event_handler("on_broadcast_frame", sync=True) self._register_event_handler("on_user_turn_started", sync=True) @property @@ -68,6 +71,24 @@ class BaseUserTurnStartStrategy(BaseObject): """ pass + async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM): + """Emit on_push_frame to push a frame using the user aggreagtor. + + Args: + frame: The frame to be pushed. + direction: What direction the frame should be pushed to. + """ + await self._call_event_handler("on_push_frame", frame, direction) + + async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs): + """Emit on_broadcast_frame to broadcast a frame using the user aggreagtor. + + Args: + frame_cls: The class of the frame to be broadcasted. + **kwargs: Keyword arguments to be passed to the frame's constructor. + """ + await self._call_event_handler("on_broadcast_frame", frame_cls, **kwargs) + async def trigger_user_turn_started(self): """Trigger the `on_user_turn_started` event.""" await self._call_event_handler("on_user_turn_started") From 49ebe345999b8db72ada52606b80349bc33d7b61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 11 Dec 2025 15:19:05 -0800 Subject: [PATCH 15/30] BaseInputTransport: broadcast SpeechControlParamsFrame --- src/pipecat/transports/base_input.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index c69328e9f..7e678a30c 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -223,8 +223,9 @@ class BaseInputTransport(FrameProcessor): vad_params = self._params.vad_analyzer.params if self._params.vad_analyzer else None turn_params = self._params.turn_analyzer.params if self._params.turn_analyzer else None - speech_frame = SpeechControlParamsFrame(vad_params=vad_params, turn_params=turn_params) - await self.push_frame(speech_frame) + await self.broadcast_frame( + SpeechControlParamsFrame, vad_params=vad_params, turn_params=turn_params + ) # Start audio filter. if self._params.audio_in_filter: @@ -342,13 +343,13 @@ class BaseInputTransport(FrameProcessor): elif isinstance(frame, VADParamsUpdateFrame): if self.vad_analyzer: self.vad_analyzer.set_params(frame.params) - speech_frame = SpeechControlParamsFrame( + await self.broadcast_frame( + SpeechControlParamsFrame, vad_params=frame.params, turn_params=self._params.turn_analyzer.params if self._params.turn_analyzer else None, ) - await self.push_frame(speech_frame) elif isinstance(frame, FilterUpdateSettingsFrame) and self._params.audio_in_filter: await self._params.audio_in_filter.process_frame(frame) # Other frames From 222ccbb471193fd4bc404378ecca8f72d02f2d08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Sun, 14 Dec 2025 12:27:53 -0800 Subject: [PATCH 16/30] SegmentedSTTService: use VAD user started/stopped speaking frames --- src/pipecat/services/stt_service.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index f81848415..6c5741bfe 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -20,8 +20,8 @@ from pipecat.frames.frames import ( StartFrame, STTMuteFrame, STTUpdateSettingsFrame, - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, + VADUserStartedSpeakingFrame, + VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService @@ -252,20 +252,15 @@ class SegmentedSTTService(STTService): """Process frames, handling VAD events and audio segmentation.""" await super().process_frame(frame, direction) - if isinstance(frame, UserStartedSpeakingFrame): + if isinstance(frame, VADUserStartedSpeakingFrame): await self._handle_user_started_speaking(frame) - elif isinstance(frame, UserStoppedSpeakingFrame): + elif isinstance(frame, VADUserStoppedSpeakingFrame): await self._handle_user_stopped_speaking(frame) - async def _handle_user_started_speaking(self, frame: UserStartedSpeakingFrame): - if frame.emulated: - return + async def _handle_user_started_speaking(self, frame: VADUserStartedSpeakingFrame): self._user_speaking = True - async def _handle_user_stopped_speaking(self, frame: UserStoppedSpeakingFrame): - if frame.emulated: - return - + async def _handle_user_stopped_speaking(self, frame: VADUserStoppedSpeakingFrame): self._user_speaking = False content = io.BytesIO() From a388ff927c5d92573df446baf6793e96bfefcd14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Sun, 14 Dec 2025 12:29:25 -0800 Subject: [PATCH 17/30] LLMUserAggregator: broadcast user started/stopped speaking frames --- .../processors/aggregators/llm_response_universal.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 241b5dbac..145dec0ec 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -402,8 +402,9 @@ class LLMUserAggregator(LLMContextAggregator): if self._params.enable_user_speaking_frames: logger.debug(f"User started speaking (user turn start strategy: {strategy})") - await self.push_frame(UserStartedSpeakingFrame()) - await self.push_frame(InterruptionFrame()) + # TODO(aleix): These frames should really come from the top of the pipeline. + await self.broadcast_frame(UserStartedSpeakingFrame, emulated=strategy is None) + await self.broadcast_frame(InterruptionFrame) async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy): if not self._user_speaking: @@ -418,7 +419,8 @@ class LLMUserAggregator(LLMContextAggregator): if self._params.enable_user_speaking_frames: logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})") - await self.push_frame(UserStoppedSpeakingFrame()) + # TODO(aleix): This frame should really come from the top of the pipeline. + await self.broadcast_frame(UserStoppedSpeakingFrame, emulated=strategy is None) # Always push context frame. await self.push_aggregation() From 2cdf20722796a2b6d0da7e09cf97e57081c45de1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 15 Dec 2025 14:16:30 -0800 Subject: [PATCH 18/30] turns: add TranscriptionUserTurnStartStrategy --- .../transcription_user_turn_start_strategy.py | 51 +++++++++++++++++++ tests/test_user_turn_start_strategy.py | 25 +++++++++ 2 files changed, 76 insertions(+) create mode 100644 src/pipecat/turns/user/transcription_user_turn_start_strategy.py diff --git a/src/pipecat/turns/user/transcription_user_turn_start_strategy.py b/src/pipecat/turns/user/transcription_user_turn_start_strategy.py new file mode 100644 index 000000000..6a5a7c907 --- /dev/null +++ b/src/pipecat/turns/user/transcription_user_turn_start_strategy.py @@ -0,0 +1,51 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""User turn start strategy based on transcriptions.""" + +from pipecat.frames.frames import BotStartedSpeakingFrame, Frame, TranscriptionFrame +from pipecat.turns.user.base_user_turn_start_strategy import BaseUserTurnStartStrategy + + +class TranscriptionUserTurnStartStrategy(BaseUserTurnStartStrategy): + """User turn start strategy based on transcriptions. + + This strategy signals the start of a user turn when a transcription is + received while the bot is speaking. It is useful as a fallback in scenarios + where VAD-based detection fails (for example, when the user speaks very + softly) but the STT service still produces transcriptions. + + """ + + def __init__(self): + """Initialize the base interruption strategy.""" + super().__init__() + self._bot_speaking = False + + async def reset(self): + """Reset the interruption strategy.""" + await super().reset() + self._bot_speaking = False + + async def process_frame(self, frame: Frame): + """Process an incoming frame to detect the start of a user turn. + + Args: + frame: The frame to be processed. + """ + await super().process_frame(frame) + + if isinstance(frame, BotStartedSpeakingFrame): + await self._handle_bot_started_speaking(frame) + elif isinstance(frame, TranscriptionFrame): + await self._handle_transcription(frame) + + async def _handle_bot_started_speaking(self, _: BotStartedSpeakingFrame): + self._bot_speaking = True + + async def _handle_transcription(self, _: TranscriptionFrame): + if self._bot_speaking: + await self.trigger_user_turn_started() diff --git a/tests/test_user_turn_start_strategy.py b/tests/test_user_turn_start_strategy.py index 144dd15cc..9402c1793 100644 --- a/tests/test_user_turn_start_strategy.py +++ b/tests/test_user_turn_start_strategy.py @@ -7,12 +7,16 @@ import unittest from pipecat.frames.frames import ( + BotStartedSpeakingFrame, InterimTranscriptionFrame, TranscriptionFrame, VADUserStartedSpeakingFrame, VADUserStoppedSpeakingFrame, ) from pipecat.turns.user.min_words_user_turn_start_strategy import MinWordsUserTurnStartStrategy +from pipecat.turns.user.transcription_user_turn_start_strategy import ( + TranscriptionUserTurnStartStrategy, +) from pipecat.turns.user.vad_user_turn_start_strategy import VADUserTurnStartStrategy @@ -104,3 +108,24 @@ class TestVADUserTurnStartStrategy(unittest.IsolatedAsyncioTestCase): await strategy.process_frame(VADUserStartedSpeakingFrame()) self.assertTrue(should_start) + + +class TestTranscriptionUserTurnStartStrategy(unittest.IsolatedAsyncioTestCase): + async def test_transcription_strategy(self): + strategy = TranscriptionUserTurnStartStrategy() + + should_start = None + + @strategy.event_handler("on_user_turn_started") + async def on_user_turn_started(strategy): + nonlocal should_start + should_start = True + + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="", timestamp="now")) + self.assertFalse(should_start) + + await strategy.process_frame(BotStartedSpeakingFrame()) + self.assertFalse(should_start) + + await strategy.process_frame(TranscriptionFrame(text="Hello!", user_id="", timestamp="now")) + self.assertTrue(should_start) From dff6b5402a41aefe7fdc102c9a96fd87d04037f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 15 Dec 2025 14:16:47 -0800 Subject: [PATCH 19/30] LLMUserAggregator: use TranscriptionUserTurnStartStrategy for emulated interruptions --- src/pipecat/pipeline/task.py | 5 ++++- .../processors/aggregators/llm_response_universal.py | 12 ------------ 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index 388723acd..56cf7d2a1 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -51,6 +51,9 @@ from pipecat.turns.bot.transcription_bot_turn_start_strategy import ( TranscriptionBotTurnStartStrategy, ) from pipecat.turns.turn_start_strategies import TurnStartStrategies +from pipecat.turns.user.transcription_user_turn_start_strategy import ( + TranscriptionUserTurnStartStrategy, +) from pipecat.turns.user.vad_user_turn_start_strategy import VADUserTurnStartStrategy from pipecat.utils.asyncio.task_manager import BaseTaskManager, TaskManager, TaskManagerParams from pipecat.utils.tracing.setup import is_tracing_available @@ -293,7 +296,7 @@ class PipelineTask(BasePipelineTask): # Initialize default user and bot turn start strategies. if not self._params.turn_start_strategies: self._params.turn_start_strategies = TurnStartStrategies( - user=[VADUserTurnStartStrategy()], + user=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()], bot=[TranscriptionBotTurnStartStrategy()], ) diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 145dec0ec..0b56fccaf 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -231,8 +231,6 @@ class LLMUserAggregator(LLMContextAggregator): super().__init__(context=context, role="user", **kwargs) self._params = params or LLMUserAggregatorParams() - self._user_speaking = False - async def cleanup(self): """Clean up processor resources.""" await super().cleanup() @@ -390,11 +388,6 @@ class LLMUserAggregator(LLMContextAggregator): await self.broadcast_frame(frame_cls, **kwargs) async def _trigger_user_turn_start(self, strategy: BaseUserTurnStartStrategy): - if self._user_speaking: - return - - self._user_speaking = True - # Reset all user turn start strategies to start fresh. if self.turn_start_strategies: for s in self.turn_start_strategies.user: @@ -407,11 +400,6 @@ class LLMUserAggregator(LLMContextAggregator): await self.broadcast_frame(InterruptionFrame) async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy): - if not self._user_speaking: - return - - self._user_speaking = False - # Reset all bot turn start strategies to start fresh. if self.turn_start_strategies: for s in self.turn_start_strategies.bot: From a9cca0b9349442492677b486cf757237a080b4cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 18 Dec 2025 10:58:40 -0800 Subject: [PATCH 20/30] LLMAssistantAggregatorParams: copy to llm_response_universal --- .../22d-natural-conversation-gemini-audio.py | 1 - .../26e-gemini-live-google-search.py | 1 - .../foundational/26f-gemini-live-files-api.py | 1 - .../26g-gemini-live-groundingMetadata.py | 1 - .../26h-gemini-live-vertex-function-calling.py | 1 - .../26i-gemini-live-graceful-end.py | 1 - examples/foundational/46-video-processing.py | 1 - .../aggregators/llm_response_universal.py | 17 ++++++++++++++--- src/pipecat/services/llm_service.py | 1 - 9 files changed, 14 insertions(+), 11 deletions(-) diff --git a/examples/foundational/22d-natural-conversation-gemini-audio.py b/examples/foundational/22d-natural-conversation-gemini-audio.py index 9654dd154..dad9bbfd8 100644 --- a/examples/foundational/22d-natural-conversation-gemini-audio.py +++ b/examples/foundational/22d-natural-conversation-gemini-audio.py @@ -22,7 +22,6 @@ from pipecat.frames.frames import ( InterruptionFrame, LLMContextFrame, LLMFullResponseStartFrame, - LLMRunFrame, StartFrame, SystemFrame, TextFrame, diff --git a/examples/foundational/26e-gemini-live-google-search.py b/examples/foundational/26e-gemini-live-google-search.py index f5a3fd675..bc14cf713 100644 --- a/examples/foundational/26e-gemini-live-google-search.py +++ b/examples/foundational/26e-gemini-live-google-search.py @@ -17,7 +17,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport diff --git a/examples/foundational/26f-gemini-live-files-api.py b/examples/foundational/26f-gemini-live-files-api.py index bb9791a05..75fda1d17 100644 --- a/examples/foundational/26f-gemini-live-files-api.py +++ b/examples/foundational/26f-gemini-live-files-api.py @@ -17,7 +17,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport diff --git a/examples/foundational/26g-gemini-live-groundingMetadata.py b/examples/foundational/26g-gemini-live-groundingMetadata.py index c05f63dad..6626e9b39 100644 --- a/examples/foundational/26g-gemini-live-groundingMetadata.py +++ b/examples/foundational/26g-gemini-live-groundingMetadata.py @@ -11,7 +11,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.runner.types import RunnerArguments diff --git a/examples/foundational/26h-gemini-live-vertex-function-calling.py b/examples/foundational/26h-gemini-live-vertex-function-calling.py index 4d1534829..af5ebe7e5 100644 --- a/examples/foundational/26h-gemini-live-vertex-function-calling.py +++ b/examples/foundational/26h-gemini-live-vertex-function-calling.py @@ -20,7 +20,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport diff --git a/examples/foundational/26i-gemini-live-graceful-end.py b/examples/foundational/26i-gemini-live-graceful-end.py index 9d3628777..9bb306e52 100644 --- a/examples/foundational/26i-gemini-live-graceful-end.py +++ b/examples/foundational/26i-gemini-live-graceful-end.py @@ -19,7 +19,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.processors.frame_processor import FrameDirection from pipecat.runner.types import RunnerArguments diff --git a/examples/foundational/46-video-processing.py b/examples/foundational/46-video-processing.py index 36075d343..41e03c719 100644 --- a/examples/foundational/46-video-processing.py +++ b/examples/foundational/46-video-processing.py @@ -16,7 +16,6 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.processors.frameworks.rtvi import RTVIObserver, RTVIProcessor diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 0b56fccaf..56b5e9e22 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -57,9 +57,6 @@ from pipecat.processors.aggregators.llm_context import ( LLMSpecificMessage, NotGiven, ) -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, -) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy from pipecat.turns.user.base_user_turn_start_strategy import BaseUserTurnStartStrategy @@ -82,6 +79,20 @@ class LLMUserAggregatorParams: enable_user_speaking_frames: bool = True +@dataclass +class LLMAssistantAggregatorParams: + """Parameters for configuring LLM assistant aggregation behavior. + + Parameters: + expect_stripped_words: Whether to expect and handle stripped words + in text frames by adding spaces between tokens. This parameter is + ignored when used with the newer LLMAssistantAggregator, which + handles word spacing automatically. + """ + + expect_stripped_words: bool = True + + class LLMContextAggregator(FrameProcessor): """Base LLM aggregator that uses an LLMContext for conversation storage. diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index d2165fe38..70164ebd8 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -47,7 +47,6 @@ from pipecat.frames.frames import ( ) from pipecat.processors.aggregators.llm_context import ( LLMContext, - LLMContextMessage, LLMSpecificMessage, ) from pipecat.processors.aggregators.llm_response import ( From 169fc0b568ec6ddd56ae511b5ea087fcb56b1deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 18 Dec 2025 16:01:53 -0800 Subject: [PATCH 21/30] frames: deprecate emulated field in UserStartedSpeakingFrame/UserStoppedSpeakingFrame --- src/pipecat/frames/frames.py | 21 ++++++++++++------- .../aggregators/llm_response_universal.py | 4 ++-- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 1b43f8ba3..6d2ece36e 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -1093,15 +1093,17 @@ class StartInterruptionFrame(InterruptionFrame): @dataclass class UserStartedSpeakingFrame(SystemFrame): - """Frame indicating user has started speaking. + """Frame indicating that the user turn has started. - Emitted by VAD to indicate that a user has started speaking. This can be - used for interruptions or other times when detecting that someone is - speaking is more important than knowing what they're saying (as you will - get with a TranscriptionFrame). + Emitted when the user turn starts, which usually means that some + transcriptions are already available. Parameters: emulated: Whether this event was emulated rather than detected by VAD. + + .. deprecated:: 0.0.99 + This field is deprecated and will be removed in a future version. + """ emulated: bool = False @@ -1109,12 +1111,17 @@ class UserStartedSpeakingFrame(SystemFrame): @dataclass class UserStoppedSpeakingFrame(SystemFrame): - """Frame indicating user has stopped speaking. + """Frame indicating that the user turn has ended. - Emitted by the VAD to indicate that a user stopped speaking. + Emitted when the user turn ends. This usually coincides with the start of + the bot turn. Parameters: emulated: Whether this event was emulated rather than detected by VAD. + + .. deprecated:: 0.0.99 + This field is deprecated and will be removed in a future version. + """ emulated: bool = False diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 56b5e9e22..9c687492c 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -407,7 +407,7 @@ class LLMUserAggregator(LLMContextAggregator): if self._params.enable_user_speaking_frames: logger.debug(f"User started speaking (user turn start strategy: {strategy})") # TODO(aleix): These frames should really come from the top of the pipeline. - await self.broadcast_frame(UserStartedSpeakingFrame, emulated=strategy is None) + await self.broadcast_frame(UserStartedSpeakingFrame) await self.broadcast_frame(InterruptionFrame) async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy): @@ -419,7 +419,7 @@ class LLMUserAggregator(LLMContextAggregator): if self._params.enable_user_speaking_frames: logger.debug(f"User stopped speaking (bot turn start strategy: {strategy})") # TODO(aleix): This frame should really come from the top of the pipeline. - await self.broadcast_frame(UserStoppedSpeakingFrame, emulated=strategy is None) + await self.broadcast_frame(UserStoppedSpeakingFrame) # Always push context frame. await self.push_aggregation() From 83263a30afb871336476bbd416f7ed92db294433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 18 Dec 2025 16:09:47 -0800 Subject: [PATCH 22/30] llm_response: deprecate old LLMUserAggregatorParams and LLMAssistantAggregatorParams --- src/pipecat/processors/aggregators/llm_response.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/pipecat/processors/aggregators/llm_response.py b/src/pipecat/processors/aggregators/llm_response.py index 5df75cf29..dcb6f76f9 100644 --- a/src/pipecat/processors/aggregators/llm_response.py +++ b/src/pipecat/processors/aggregators/llm_response.py @@ -67,6 +67,10 @@ from pipecat.utils.time import time_now_iso8601 class LLMUserAggregatorParams: """Parameters for configuring LLM user aggregation behavior. + .. deprecated:: 0.0.99 + This class is deprecated, use the new universal `LLMContext` and + `LLMContextAggregatorPair`. + Parameters: aggregation_timeout: Maximum time in seconds to wait for additional transcription content before pushing aggregated result. This @@ -77,17 +81,26 @@ class LLMUserAggregatorParams: enable_emulated_vad_interruptions: When True, allows emulated VAD events to interrupt the bot when it's speaking. When False, emulated speech is ignored while the bot is speaking. + enable_user_speaking_frames: [DO NOT USE] added for temporary backwards + compatibility. + """ aggregation_timeout: float = 0.5 turn_emulated_vad_timeout: float = 0.8 enable_emulated_vad_interruptions: bool = False + # Added for backwards compatibility. + enable_user_speaking_frames: bool = True @dataclass class LLMAssistantAggregatorParams: """Parameters for configuring LLM assistant aggregation behavior. + .. deprecated:: 0.0.99 + This class is deprecated, use the new universal `LLMContext` and + `LLMContextAggregatorPair`. + Parameters: expect_stripped_words: Whether to expect and handle stripped words in text frames by adding spaces between tokens. This parameter is From d22e1f18bb28cef6c57de0e99a3b85d5efc17867 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 18 Dec 2025 22:17:41 -0800 Subject: [PATCH 23/30] examples: update with new user and bot turn start strategies --- .../04-transports-small-webrtc.py | 7 +++++-- examples/foundational/04a-transports-daily.py | 7 +++++-- .../foundational/04b-transports-livekit.py | 7 +++++-- .../foundational/06-listen-and-respond.py | 9 ++++---- examples/foundational/06a-image-sync.py | 8 ++++--- .../07-interruptible-cartesia-http.py | 9 ++++---- examples/foundational/07-interruptible.py | 9 ++++---- .../07a-interruptible-speechmatics.py | 9 ++++---- .../foundational/07aa-interruptible-soniox.py | 9 ++++---- .../07ab-interruptible-inworld-http.py | 9 ++++---- .../07ab-interruptible-inworld.py | 9 ++++---- .../07ac-interruptible-asyncai-http.py | 9 ++++---- .../07ac-interruptible-asyncai.py | 9 ++++---- .../07ad-interruptible-aicoustics.py | 10 ++++----- .../foundational/07ae-interruptible-hume.py | 9 ++++---- .../07af-interruptible-gradium.py | 9 ++++---- .../07b-interruptible-langchain.py | 9 ++++---- .../07c-interruptible-deepgram-flux.py | 5 ++++- .../07c-interruptible-deepgram-http.py | 9 ++++---- .../07c-interruptible-deepgram-sagemaker.py | 9 ++++---- .../07c-interruptible-deepgram.py | 9 ++++---- .../07d-interruptible-elevenlabs-http.py | 9 ++++---- .../07d-interruptible-elevenlabs.py | 9 ++++---- .../07e-interruptible-playht-http.py | 9 ++++---- .../foundational/07e-interruptible-playht.py | 9 ++++---- .../07f-interruptible-azure-http.py | 9 ++++---- .../foundational/07f-interruptible-azure.py | 9 ++++---- .../foundational/07g-interruptible-openai.py | 9 ++++---- .../07h-interruptible-openpipe.py | 9 ++++---- .../foundational/07i-interruptible-xtts.py | 9 ++++---- .../foundational/07j-interruptible-gladia.py | 9 ++++---- .../foundational/07k-interruptible-lmnt.py | 8 ++++--- .../foundational/07l-interruptible-groq.py | 9 ++++---- .../foundational/07m-interruptible-aws.py | 9 ++++---- .../07n-interruptible-gemini-image.py | 8 ++++--- .../foundational/07n-interruptible-gemini.py | 9 ++++---- .../07n-interruptible-google-http.py | 11 +++++----- .../foundational/07n-interruptible-google.py | 9 ++++---- .../07o-interruptible-assemblyai.py | 9 ++++---- .../07p-interruptible-krisp-viva.py | 9 ++++---- .../foundational/07p-interruptible-krisp.py | 9 ++++---- .../07q-interruptible-rime-http.py | 9 ++++---- .../foundational/07q-interruptible-rime.py | 9 ++++---- .../foundational/07r-interruptible-nvidia.py | 9 ++++---- .../07s-interruptible-google-audio-in.py | 9 ++++---- .../foundational/07t-interruptible-fish.py | 9 ++++---- .../07v-interruptible-neuphonic-http.py | 9 ++++---- .../07v-interruptible-neuphonic.py | 9 ++++---- .../foundational/07w-interruptible-fal.py | 9 ++++---- .../foundational/07x-interruptible-local.py | 9 ++++---- .../foundational/07y-interruptible-minimax.py | 9 ++++---- .../07z-interruptible-sarvam-http.py | 9 ++++---- .../foundational/07z-interruptible-sarvam.py | 14 ++++++------- .../foundational/08-custom-frame-processor.py | 10 ++++----- examples/foundational/10-wake-phrase.py | 9 ++++---- examples/foundational/11-sound-effects.py | 13 +++++++----- .../foundational/12-describe-image-openai.py | 8 ++++--- .../12a-describe-image-anthropic.py | 8 ++++--- .../foundational/12b-describe-image-aws.py | 8 ++++--- .../12c-describe-image-gemini-flash.py | 8 ++++--- .../12d-describe-image-moondream.py | 8 ++++--- examples/foundational/14-function-calling.py | 9 ++++---- .../14a-function-calling-anthropic.py | 9 ++++---- .../14c-function-calling-together.py | 9 ++++---- .../14d-function-calling-anthropic-video.py | 8 ++++--- .../14d-function-calling-aws-video.py | 8 ++++--- ...14d-function-calling-gemini-flash-video.py | 8 ++++--- .../14d-function-calling-moondream-video.py | 12 +++++++---- .../14d-function-calling-openai-video.py | 8 ++++--- .../14e-function-calling-google.py | 8 ++++--- .../foundational/14f-function-calling-groq.py | 9 ++++---- .../foundational/14g-function-calling-grok.py | 9 ++++---- .../14h-function-calling-azure.py | 9 ++++---- .../14i-function-calling-fireworks.py | 11 +++++----- .../14j-function-calling-nvidia.py | 9 ++++---- .../14k-function-calling-cerebras.py | 9 ++++---- .../14l-function-calling-deepseek.py | 9 ++++---- .../14m-function-calling-openrouter.py | 9 ++++---- .../14n-function-calling-perplexity.py | 9 ++++---- ...o-function-calling-gemini-openai-format.py | 9 ++++---- .../14p-function-calling-gemini-vertex-ai.py | 9 ++++---- .../foundational/14q-function-calling-qwen.py | 9 ++++---- .../foundational/14r-function-calling-aws.py | 9 ++++---- .../14s-function-calling-sambanova.py | 9 ++++---- .../14t-function-calling-direct.py | 9 ++++---- .../14u-function-calling-ollama.py | 10 ++++----- .../14v-function-calling-openai.py | 9 ++++---- .../14w-function-calling-mistral.py | 11 +++++----- .../14x-function-calling-openpipe.py | 9 ++++---- examples/foundational/15-switch-voices.py | 9 ++++---- examples/foundational/15a-switch-languages.py | 9 ++++---- .../16-gpu-container-local-bot.py | 9 ++++---- examples/foundational/17-detect-user-idle.py | 9 ++++---- .../20a-persistent-context-openai.py | 9 ++++---- .../20c-persistent-context-anthropic.py | 9 ++++---- .../20d-persistent-context-gemini.py | 10 +++++---- examples/foundational/21-tavus-transport.py | 7 +++++-- .../foundational/21a-tavus-video-service.py | 8 ++++--- .../foundational/23-bot-background-sound.py | 9 ++++---- examples/foundational/24-stt-mute-filter.py | 9 ++++---- examples/foundational/27-simli-layer.py | 8 ++++--- .../28-transcription-processor.py | 9 ++++---- .../foundational/29-turn-tracking-observer.py | 9 ++++---- examples/foundational/30-observer.py | 9 ++++---- .../32-gemini-grounding-metadata.py | 9 ++++---- examples/foundational/33-gemini-rag.py | 9 ++++---- examples/foundational/34-audio-recording.py | 9 ++++---- .../35-pattern-pair-voice-switching.py | 9 ++++---- .../foundational/36-user-email-gathering.py | 9 ++++---- examples/foundational/37-mem0.py | 9 ++++---- examples/foundational/38-smart-turn-fal.py | 21 +++++++++++-------- .../38a-smart-turn-local-coreml.py | 21 ++++++++++--------- examples/foundational/38b-smart-turn-local.py | 11 +++++----- examples/foundational/39-mcp-stdio.py | 8 ++++--- .../foundational/39a-mcp-streamable-http.py | 9 ++++---- .../39b-mcp-streamable-http-gemini-live.py | 11 +++++----- examples/foundational/39c-multiple-mcp.py | 8 ++++--- .../foundational/42-interruption-config.py | 13 ++++++------ examples/foundational/43-heygen-transport.py | 7 +++++-- .../foundational/43a-heygen-video-service.py | 8 ++++--- .../foundational/44-voicemail-detection.py | 9 ++++---- .../45-before-and-after-events.py | 9 ++++---- examples/foundational/47-sentry-metrics.py | 9 ++++---- examples/foundational/48-service-switcher.py | 9 ++++---- .../foundational/49a-thinking-anthropic.py | 9 ++++---- examples/quickstart/bot.py | 7 +++++-- 126 files changed, 657 insertions(+), 497 deletions(-) diff --git a/examples/foundational/04-transports-small-webrtc.py b/examples/foundational/04-transports-small-webrtc.py index 9a622e200..84dba963f 100644 --- a/examples/foundational/04-transports-small-webrtc.py +++ b/examples/foundational/04-transports-small-webrtc.py @@ -17,7 +17,6 @@ from fastapi.responses import RedirectResponse from loguru import logger from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -33,6 +32,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import TransportParams from pipecat.transports.smallwebrtc.connection import IceServer, SmallWebRTCConnection from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -61,7 +62,6 @@ async def run_example(webrtc_connection: SmallWebRTCConnection): audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), ) @@ -101,6 +101,9 @@ async def run_example(webrtc_connection: SmallWebRTCConnection): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), ) diff --git a/examples/foundational/04a-transports-daily.py b/examples/foundational/04a-transports-daily.py index 7e5e432ff..851217f47 100644 --- a/examples/foundational/04a-transports-daily.py +++ b/examples/foundational/04a-transports-daily.py @@ -12,7 +12,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -26,6 +25,8 @@ from pipecat.runner.daily import configure from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.daily.transport import DailyParams, DailyTransport +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -46,7 +47,6 @@ async def main(): audio_out_enabled=True, transcription_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), ) @@ -83,6 +83,9 @@ async def main(): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), ) diff --git a/examples/foundational/04b-transports-livekit.py b/examples/foundational/04b-transports-livekit.py index d2941e2b7..3aff36bc2 100644 --- a/examples/foundational/04b-transports-livekit.py +++ b/examples/foundational/04b-transports-livekit.py @@ -12,7 +12,6 @@ import sys from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -33,6 +32,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -51,7 +52,6 @@ async def main(): audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), ) @@ -94,6 +94,9 @@ async def main(): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), ) diff --git a/examples/foundational/06-listen-and-respond.py b/examples/foundational/06-listen-and-respond.py index 5b1eed538..3b898e921 100644 --- a/examples/foundational/06-listen-and-respond.py +++ b/examples/foundational/06-listen-and-respond.py @@ -9,7 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -34,6 +33,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -66,19 +67,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -125,6 +123,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/06a-image-sync.py b/examples/foundational/06a-image-sync.py index e0edf1b36..95d315572 100644 --- a/examples/foundational/06a-image-sync.py +++ b/examples/foundational/06a-image-sync.py @@ -10,7 +10,6 @@ from dotenv import load_dotenv from loguru import logger from PIL import Image -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -34,6 +33,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -84,7 +85,6 @@ transport_params = { video_out_width=1024, video_out_height=1024, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, @@ -93,7 +93,6 @@ transport_params = { video_out_width=1024, video_out_height=1024, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -143,6 +142,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07-interruptible-cartesia-http.py b/examples/foundational/07-interruptible-cartesia-http.py index 299332459..ea7f0d1db 100644 --- a/examples/foundational/07-interruptible-cartesia-http.py +++ b/examples/foundational/07-interruptible-cartesia-http.py @@ -9,7 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -27,6 +26,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -95,6 +93,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07-interruptible.py b/examples/foundational/07-interruptible.py index d6699b390..648ed4e33 100644 --- a/examples/foundational/07-interruptible.py +++ b/examples/foundational/07-interruptible.py @@ -9,7 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -27,6 +26,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -38,19 +39,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -94,6 +92,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07a-interruptible-speechmatics.py b/examples/foundational/07a-interruptible-speechmatics.py index 9d93a4464..981a69fdc 100644 --- a/examples/foundational/07a-interruptible-speechmatics.py +++ b/examples/foundational/07a-interruptible-speechmatics.py @@ -10,7 +10,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -41,19 +42,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -137,6 +135,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07aa-interruptible-soniox.py b/examples/foundational/07aa-interruptible-soniox.py index b211837a9..850b2bba7 100644 --- a/examples/foundational/07aa-interruptible-soniox.py +++ b/examples/foundational/07aa-interruptible-soniox.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.soniox.stt import SonioxSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -36,19 +37,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -93,6 +91,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07ab-interruptible-inworld-http.py b/examples/foundational/07ab-interruptible-inworld-http.py index 2d7717839..a5c151813 100644 --- a/examples/foundational/07ab-interruptible-inworld-http.py +++ b/examples/foundational/07ab-interruptible-inworld-http.py @@ -10,7 +10,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.transports.base_output import BaseOutputTransport from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -103,6 +101,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), observers=[ RTVIObserver(rtvi), diff --git a/examples/foundational/07ab-interruptible-inworld.py b/examples/foundational/07ab-interruptible-inworld.py index ee7f15ef9..cc7c82e80 100644 --- a/examples/foundational/07ab-interruptible-inworld.py +++ b/examples/foundational/07ab-interruptible-inworld.py @@ -9,7 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.transports.base_output import BaseOutputTransport from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -100,6 +98,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), observers=[ RTVIObserver(rtvi), diff --git a/examples/foundational/07ac-interruptible-asyncai-http.py b/examples/foundational/07ac-interruptible-asyncai-http.py index 237104bd2..25281724c 100644 --- a/examples/foundational/07ac-interruptible-asyncai-http.py +++ b/examples/foundational/07ac-interruptible-asyncai-http.py @@ -11,7 +11,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -41,19 +42,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -100,6 +98,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07ac-interruptible-asyncai.py b/examples/foundational/07ac-interruptible-asyncai.py index 35d3e89e9..efc86b775 100644 --- a/examples/foundational/07ac-interruptible-asyncai.py +++ b/examples/foundational/07ac-interruptible-asyncai.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -96,6 +94,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07ad-interruptible-aicoustics.py b/examples/foundational/07ad-interruptible-aicoustics.py index edcd9498f..3b9e213d2 100644 --- a/examples/foundational/07ad-interruptible-aicoustics.py +++ b/examples/foundational/07ad-interruptible-aicoustics.py @@ -13,9 +13,7 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.filters.aic_filter import AICFilter -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -31,6 +29,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -60,7 +60,6 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=aic.create_vad_analyzer(lookback_buffer_size=6.0, sensitivity=6.0), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), audio_in_filter=aic, ) )(_create_aic_filter()), @@ -69,7 +68,6 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=aic.create_vad_analyzer(lookback_buffer_size=6.0, sensitivity=6.0), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), audio_in_filter=aic, ) )(_create_aic_filter()), @@ -78,7 +76,6 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=aic.create_vad_analyzer(lookback_buffer_size=6.0, sensitivity=6.0), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), audio_in_filter=aic, ) )(_create_aic_filter()), @@ -125,6 +122,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07ae-interruptible-hume.py b/examples/foundational/07ae-interruptible-hume.py index c5de34c85..6d7d61e15 100644 --- a/examples/foundational/07ae-interruptible-hume.py +++ b/examples/foundational/07ae-interruptible-hume.py @@ -9,7 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -32,6 +31,8 @@ from pipecat.transports.base_output import BaseOutputTransport from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -44,19 +45,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -105,6 +103,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): enable_metrics=True, enable_usage_metrics=True, audio_out_sample_rate=HUME_SAMPLE_RATE, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, observers=[ diff --git a/examples/foundational/07af-interruptible-gradium.py b/examples/foundational/07af-interruptible-gradium.py index 9ad3bcebb..18333760d 100644 --- a/examples/foundational/07af-interruptible-gradium.py +++ b/examples/foundational/07af-interruptible-gradium.py @@ -9,7 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -27,6 +26,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -38,19 +39,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -94,6 +92,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07b-interruptible-langchain.py b/examples/foundational/07b-interruptible-langchain.py index 3567d5098..bbb6812b5 100644 --- a/examples/foundational/07b-interruptible-langchain.py +++ b/examples/foundational/07b-interruptible-langchain.py @@ -15,7 +15,6 @@ from langchain_core.runnables.history import RunnableWithMessageHistory from langchain_openai import ChatOpenAI from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -33,6 +32,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -54,19 +55,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -121,6 +119,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07c-interruptible-deepgram-flux.py b/examples/foundational/07c-interruptible-deepgram-flux.py index 62579c2c5..a47140ea2 100644 --- a/examples/foundational/07c-interruptible-deepgram-flux.py +++ b/examples/foundational/07c-interruptible-deepgram-flux.py @@ -17,6 +17,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_response_universal import ( LLMContext, LLMContextAggregatorPair, + LLMUserAggregatorParams, ) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport @@ -69,7 +70,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ] context = LLMContext(messages) - context_aggregator = LLMContextAggregatorPair(context) + context_aggregator = LLMContextAggregatorPair( + context, user_params=LLMUserAggregatorParams(enable_user_speaking_frames=False) + ) pipeline = Pipeline( [ diff --git a/examples/foundational/07c-interruptible-deepgram-http.py b/examples/foundational/07c-interruptible-deepgram-http.py index 03375c27a..5d9d85bb5 100644 --- a/examples/foundational/07c-interruptible-deepgram-http.py +++ b/examples/foundational/07c-interruptible-deepgram-http.py @@ -11,7 +11,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -41,19 +42,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -99,6 +97,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07c-interruptible-deepgram-sagemaker.py b/examples/foundational/07c-interruptible-deepgram-sagemaker.py index db230a8ba..cedb8c4fe 100644 --- a/examples/foundational/07c-interruptible-deepgram-sagemaker.py +++ b/examples/foundational/07c-interruptible-deepgram-sagemaker.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -104,6 +102,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07c-interruptible-deepgram.py b/examples/foundational/07c-interruptible-deepgram.py index e73711733..4ebbb84ab 100644 --- a/examples/foundational/07c-interruptible-deepgram.py +++ b/examples/foundational/07c-interruptible-deepgram.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -93,6 +91,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07d-interruptible-elevenlabs-http.py b/examples/foundational/07d-interruptible-elevenlabs-http.py index 7d3d5c0f8..40f6ab5ed 100644 --- a/examples/foundational/07d-interruptible-elevenlabs-http.py +++ b/examples/foundational/07d-interruptible-elevenlabs-http.py @@ -11,7 +11,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -41,19 +42,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -103,6 +101,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07d-interruptible-elevenlabs.py b/examples/foundational/07d-interruptible-elevenlabs.py index e7025948c..f466b4902 100644 --- a/examples/foundational/07d-interruptible-elevenlabs.py +++ b/examples/foundational/07d-interruptible-elevenlabs.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -96,6 +94,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07e-interruptible-playht-http.py b/examples/foundational/07e-interruptible-playht-http.py index 5d6b7ceec..36055cbcd 100644 --- a/examples/foundational/07e-interruptible-playht-http.py +++ b/examples/foundational/07e-interruptible-playht-http.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.playht.tts import PlayHTHttpTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -96,6 +94,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07e-interruptible-playht.py b/examples/foundational/07e-interruptible-playht.py index f4a23772b..971bf240e 100644 --- a/examples/foundational/07e-interruptible-playht.py +++ b/examples/foundational/07e-interruptible-playht.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -98,6 +96,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07f-interruptible-azure-http.py b/examples/foundational/07f-interruptible-azure-http.py index 0ce19bf48..4ea361096 100644 --- a/examples/foundational/07f-interruptible-azure-http.py +++ b/examples/foundational/07f-interruptible-azure-http.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.azure.tts import AzureHttpTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -102,6 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07f-interruptible-azure.py b/examples/foundational/07f-interruptible-azure.py index 6d4cf5793..7e601de50 100644 --- a/examples/foundational/07f-interruptible-azure.py +++ b/examples/foundational/07f-interruptible-azure.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.azure.tts import AzureTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -102,6 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07g-interruptible-openai.py b/examples/foundational/07g-interruptible-openai.py index aa44e5a42..22415c090 100644 --- a/examples/foundational/07g-interruptible-openai.py +++ b/examples/foundational/07g-interruptible-openai.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.tts import OpenAITTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -97,6 +95,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): audio_out_sample_rate=24000, enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07h-interruptible-openpipe.py b/examples/foundational/07h-interruptible-openpipe.py index 60565d1f9..6a66a8425 100644 --- a/examples/foundational/07h-interruptible-openpipe.py +++ b/examples/foundational/07h-interruptible-openpipe.py @@ -11,7 +11,6 @@ import time from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openpipe.llm import OpenPipeLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -101,6 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07i-interruptible-xtts.py b/examples/foundational/07i-interruptible-xtts.py index 9ad73c7d2..ad4682dac 100644 --- a/examples/foundational/07i-interruptible-xtts.py +++ b/examples/foundational/07i-interruptible-xtts.py @@ -11,7 +11,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.xtts.tts import XTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -99,6 +97,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07j-interruptible-gladia.py b/examples/foundational/07j-interruptible-gladia.py index 079967857..622cc9969 100644 --- a/examples/foundational/07j-interruptible-gladia.py +++ b/examples/foundational/07j-interruptible-gladia.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -41,19 +42,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -105,6 +103,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07k-interruptible-lmnt.py b/examples/foundational/07k-interruptible-lmnt.py index 2d57b28a5..1be0c39d4 100644 --- a/examples/foundational/07k-interruptible-lmnt.py +++ b/examples/foundational/07k-interruptible-lmnt.py @@ -28,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -92,6 +91,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07l-interruptible-groq.py b/examples/foundational/07l-interruptible-groq.py index b184c9193..3c2eb3c20 100644 --- a/examples/foundational/07l-interruptible-groq.py +++ b/examples/foundational/07l-interruptible-groq.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.groq.tts import GroqTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -94,6 +92,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07m-interruptible-aws.py b/examples/foundational/07m-interruptible-aws.py index b53f1f367..617270956 100644 --- a/examples/foundational/07m-interruptible-aws.py +++ b/examples/foundational/07m-interruptible-aws.py @@ -8,7 +8,6 @@ from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -26,6 +25,8 @@ from pipecat.services.aws.tts import AWSPollyTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -37,19 +38,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -98,6 +96,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07n-interruptible-gemini-image.py b/examples/foundational/07n-interruptible-gemini-image.py index 62af2cf46..f80e3736d 100644 --- a/examples/foundational/07n-interruptible-gemini-image.py +++ b/examples/foundational/07n-interruptible-gemini-image.py @@ -25,7 +25,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -44,6 +43,8 @@ from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -58,7 +59,6 @@ transport_params = { video_out_width=1024, video_out_height=1024, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, @@ -67,7 +67,6 @@ transport_params = { video_out_width=1024, video_out_height=1024, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -119,6 +118,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07n-interruptible-gemini.py b/examples/foundational/07n-interruptible-gemini.py index ed689a28e..696008f91 100644 --- a/examples/foundational/07n-interruptible-gemini.py +++ b/examples/foundational/07n-interruptible-gemini.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -125,6 +123,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07n-interruptible-google-http.py b/examples/foundational/07n-interruptible-google-http.py index 4a0382990..0ecaaa829 100644 --- a/examples/foundational/07n-interruptible-google-http.py +++ b/examples/foundational/07n-interruptible-google-http.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -24,11 +23,13 @@ from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.google.llm import GoogleLLMService from pipecat.services.google.stt import GoogleSTTService -from pipecat.services.google.tts import GoogleHttpTTSService, GoogleTTSService +from pipecat.services.google.tts import GoogleHttpTTSService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -108,6 +106,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07n-interruptible-google.py b/examples/foundational/07n-interruptible-google.py index 28b61c151..da46c819f 100644 --- a/examples/foundational/07n-interruptible-google.py +++ b/examples/foundational/07n-interruptible-google.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -108,6 +106,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07o-interruptible-assemblyai.py b/examples/foundational/07o-interruptible-assemblyai.py index 2a76dbad8..61962ec11 100644 --- a/examples/foundational/07o-interruptible-assemblyai.py +++ b/examples/foundational/07o-interruptible-assemblyai.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -98,6 +96,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07p-interruptible-krisp-viva.py b/examples/foundational/07p-interruptible-krisp-viva.py index c8b374dac..9e15015da 100644 --- a/examples/foundational/07p-interruptible-krisp-viva.py +++ b/examples/foundational/07p-interruptible-krisp-viva.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,21 +41,18 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), audio_in_filter=KrispVivaFilter(), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), audio_in_filter=KrispVivaFilter(), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), audio_in_filter=KrispVivaFilter(), ), } @@ -96,6 +94,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07p-interruptible-krisp.py b/examples/foundational/07p-interruptible-krisp.py index 5dfaaff44..3534e2a61 100644 --- a/examples/foundational/07p-interruptible-krisp.py +++ b/examples/foundational/07p-interruptible-krisp.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.filters.krisp_filter import KrispFilter -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,21 +41,18 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), audio_in_filter=KrispFilter(), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), audio_in_filter=KrispFilter(), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), audio_in_filter=KrispFilter(), ), } @@ -96,6 +94,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07q-interruptible-rime-http.py b/examples/foundational/07q-interruptible-rime-http.py index d5fa8b710..3230691c5 100644 --- a/examples/foundational/07q-interruptible-rime-http.py +++ b/examples/foundational/07q-interruptible-rime-http.py @@ -11,7 +11,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.rime.tts import RimeHttpTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -41,19 +42,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -101,6 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07q-interruptible-rime.py b/examples/foundational/07q-interruptible-rime.py index e66222db9..5dd492d81 100644 --- a/examples/foundational/07q-interruptible-rime.py +++ b/examples/foundational/07q-interruptible-rime.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.rime.tts import RimeTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -95,6 +93,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07r-interruptible-nvidia.py b/examples/foundational/07r-interruptible-nvidia.py index bba99ea4c..155474045 100644 --- a/examples/foundational/07r-interruptible-nvidia.py +++ b/examples/foundational/07r-interruptible-nvidia.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.nvidia.tts import NvidiaTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -94,6 +92,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07s-interruptible-google-audio-in.py b/examples/foundational/07s-interruptible-google-audio-in.py index 90bff6062..b7596579f 100644 --- a/examples/foundational/07s-interruptible-google-audio-in.py +++ b/examples/foundational/07s-interruptible-google-audio-in.py @@ -12,7 +12,6 @@ from dotenv import load_dotenv from google.genai.types import Content, Part from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -42,6 +41,8 @@ from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -201,19 +202,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -272,6 +270,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07t-interruptible-fish.py b/examples/foundational/07t-interruptible-fish.py index 53ee61dea..3f499773b 100644 --- a/examples/foundational/07t-interruptible-fish.py +++ b/examples/foundational/07t-interruptible-fish.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -96,6 +94,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07v-interruptible-neuphonic-http.py b/examples/foundational/07v-interruptible-neuphonic-http.py index 6de428d8b..6a62fd8a1 100644 --- a/examples/foundational/07v-interruptible-neuphonic-http.py +++ b/examples/foundational/07v-interruptible-neuphonic-http.py @@ -11,7 +11,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -41,19 +42,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -100,6 +98,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07v-interruptible-neuphonic.py b/examples/foundational/07v-interruptible-neuphonic.py index b0a49104e..33c0b4ea6 100644 --- a/examples/foundational/07v-interruptible-neuphonic.py +++ b/examples/foundational/07v-interruptible-neuphonic.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -95,6 +93,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07w-interruptible-fal.py b/examples/foundational/07w-interruptible-fal.py index 6836f439e..18ec22774 100644 --- a/examples/foundational/07w-interruptible-fal.py +++ b/examples/foundational/07w-interruptible-fal.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -98,6 +96,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07x-interruptible-local.py b/examples/foundational/07x-interruptible-local.py index ce9c7597d..00da2927c 100644 --- a/examples/foundational/07x-interruptible-local.py +++ b/examples/foundational/07x-interruptible-local.py @@ -11,7 +11,6 @@ import sys from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -21,12 +20,12 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,7 +39,6 @@ async def main(): audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ) ) @@ -80,6 +78,9 @@ async def main(): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), ) diff --git a/examples/foundational/07y-interruptible-minimax.py b/examples/foundational/07y-interruptible-minimax.py index 6a5d33887..c20c98b67 100644 --- a/examples/foundational/07y-interruptible-minimax.py +++ b/examples/foundational/07y-interruptible-minimax.py @@ -11,7 +11,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -42,19 +43,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -102,6 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07z-interruptible-sarvam-http.py b/examples/foundational/07z-interruptible-sarvam-http.py index 73239167d..5e918cae6 100644 --- a/examples/foundational/07z-interruptible-sarvam-http.py +++ b/examples/foundational/07z-interruptible-sarvam-http.py @@ -11,7 +11,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -42,19 +43,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -104,6 +102,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/07z-interruptible-sarvam.py b/examples/foundational/07z-interruptible-sarvam.py index 41418049f..f7d691af8 100644 --- a/examples/foundational/07z-interruptible-sarvam.py +++ b/examples/foundational/07z-interruptible-sarvam.py @@ -4,19 +4,15 @@ # SPDX-License-Identifier: BSD 2-Clause License # - -import asyncio import os -import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams -from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -30,6 +26,8 @@ from pipecat.services.sarvam.tts import SarvamTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -42,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -101,6 +96,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), ) diff --git a/examples/foundational/08-custom-frame-processor.py b/examples/foundational/08-custom-frame-processor.py index 72b16abe6..fef13a4c7 100644 --- a/examples/foundational/08-custom-frame-processor.py +++ b/examples/foundational/08-custom-frame-processor.py @@ -4,14 +4,11 @@ # SPDX-License-Identifier: BSD 2-Clause License # -import io import os -import re from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -33,6 +30,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -83,14 +82,12 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, video_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -137,6 +134,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/10-wake-phrase.py b/examples/foundational/10-wake-phrase.py index 30a7d6ca9..c0d4fbbd2 100644 --- a/examples/foundational/10-wake-phrase.py +++ b/examples/foundational/10-wake-phrase.py @@ -9,7 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -98,6 +96,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/11-sound-effects.py b/examples/foundational/11-sound-effects.py index a15bae147..6802e25dd 100644 --- a/examples/foundational/11-sound-effects.py +++ b/examples/foundational/11-sound-effects.py @@ -10,7 +10,6 @@ import wave from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -23,7 +22,7 @@ from pipecat.frames.frames import ( ) from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineTask +from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -36,6 +35,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -89,19 +90,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -150,6 +148,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): task = PipelineTask( pipeline, + params=PipelineParams( + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), + ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/12-describe-image-openai.py b/examples/foundational/12-describe-image-openai.py index 477803da6..24995698d 100644 --- a/examples/foundational/12-describe-image-openai.py +++ b/examples/foundational/12-describe-image-openai.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from PIL import Image -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,13 +41,11 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -90,6 +89,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/12a-describe-image-anthropic.py b/examples/foundational/12a-describe-image-anthropic.py index ac4e8f01c..49541b926 100644 --- a/examples/foundational/12a-describe-image-anthropic.py +++ b/examples/foundational/12a-describe-image-anthropic.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from PIL import Image -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,13 +41,11 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -90,6 +89,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/12b-describe-image-aws.py b/examples/foundational/12b-describe-image-aws.py index cf1ce66a0..f1fffbcb0 100644 --- a/examples/foundational/12b-describe-image-aws.py +++ b/examples/foundational/12b-describe-image-aws.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from PIL import Image -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,13 +41,11 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -97,6 +96,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/12c-describe-image-gemini-flash.py b/examples/foundational/12c-describe-image-gemini-flash.py index bfd7f5146..50438bd92 100644 --- a/examples/foundational/12c-describe-image-gemini-flash.py +++ b/examples/foundational/12c-describe-image-gemini-flash.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from PIL import Image -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,13 +41,11 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -90,6 +89,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/12d-describe-image-moondream.py b/examples/foundational/12d-describe-image-moondream.py index ee6f328f1..9b8f195bd 100644 --- a/examples/foundational/12d-describe-image-moondream.py +++ b/examples/foundational/12d-describe-image-moondream.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from PIL import Image -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -25,6 +24,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.moondream.vision import MoondreamService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -36,12 +37,10 @@ transport_params = { "daily": lambda: DailyParams( audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -69,6 +68,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14-function-calling.py b/examples/foundational/14-function-calling.py index 3f30e3389..51c5780e2 100644 --- a/examples/foundational/14-function-calling.py +++ b/examples/foundational/14-function-calling.py @@ -11,7 +11,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -50,19 +51,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -144,6 +142,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14a-function-calling-anthropic.py b/examples/foundational/14a-function-calling-anthropic.py index 5f9a0ec06..ed4671455 100644 --- a/examples/foundational/14a-function-calling-anthropic.py +++ b/examples/foundational/14a-function-calling-anthropic.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -52,19 +53,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -139,6 +137,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14c-function-calling-together.py b/examples/foundational/14c-function-calling-together.py index d46b2afdb..3264b10d9 100644 --- a/examples/foundational/14c-function-calling-together.py +++ b/examples/foundational/14c-function-calling-together.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.together.llm import TogetherLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -130,6 +128,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14d-function-calling-anthropic-video.py b/examples/foundational/14d-function-calling-anthropic-video.py index 9f8dbcb76..b1851490b 100644 --- a/examples/foundational/14d-function-calling-anthropic-video.py +++ b/examples/foundational/14d-function-calling-anthropic-video.py @@ -11,7 +11,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -34,6 +33,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -73,14 +74,12 @@ transport_params = { audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -143,6 +142,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14d-function-calling-aws-video.py b/examples/foundational/14d-function-calling-aws-video.py index f807e5bff..adac8893b 100644 --- a/examples/foundational/14d-function-calling-aws-video.py +++ b/examples/foundational/14d-function-calling-aws-video.py @@ -11,7 +11,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -34,6 +33,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -73,14 +74,12 @@ transport_params = { audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -150,6 +149,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14d-function-calling-gemini-flash-video.py b/examples/foundational/14d-function-calling-gemini-flash-video.py index 5af3bc6b0..c9784bb5a 100644 --- a/examples/foundational/14d-function-calling-gemini-flash-video.py +++ b/examples/foundational/14d-function-calling-gemini-flash-video.py @@ -11,7 +11,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -34,6 +33,8 @@ from pipecat.services.google.llm import GoogleLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -73,14 +74,12 @@ transport_params = { audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -143,6 +142,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14d-function-calling-moondream-video.py b/examples/foundational/14d-function-calling-moondream-video.py index 9544818b9..36e6e585c 100644 --- a/examples/foundational/14d-function-calling-moondream-video.py +++ b/examples/foundational/14d-function-calling-moondream-video.py @@ -11,7 +11,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -26,7 +25,7 @@ from pipecat.frames.frames import ( from pipecat.pipeline.parallel_pipeline import ParallelPipeline from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineTask +from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -43,6 +42,8 @@ from pipecat.services.moondream.vision import MoondreamService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -103,14 +104,12 @@ transport_params = { audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -181,6 +180,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): task = PipelineTask( pipeline, + params=PipelineParams( + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), + ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14d-function-calling-openai-video.py b/examples/foundational/14d-function-calling-openai-video.py index f0d36bca4..67686accb 100644 --- a/examples/foundational/14d-function-calling-openai-video.py +++ b/examples/foundational/14d-function-calling-openai-video.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -35,6 +34,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -74,14 +75,12 @@ transport_params = { audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -143,6 +142,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14e-function-calling-google.py b/examples/foundational/14e-function-calling-google.py index 6019fc8b8..3e4188d38 100644 --- a/examples/foundational/14e-function-calling-google.py +++ b/examples/foundational/14e-function-calling-google.py @@ -13,7 +13,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -35,6 +34,8 @@ from pipecat.services.google.llm import GoogleLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -82,14 +83,12 @@ transport_params = { audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -196,6 +195,9 @@ indicate you should use the get_image tool are: params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14f-function-calling-groq.py b/examples/foundational/14f-function-calling-groq.py index 9a5ab27c0..9986ba2c8 100644 --- a/examples/foundational/14f-function-calling-groq.py +++ b/examples/foundational/14f-function-calling-groq.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -127,6 +125,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14g-function-calling-grok.py b/examples/foundational/14g-function-calling-grok.py index ffd5ad947..d2ec386e9 100644 --- a/examples/foundational/14g-function-calling-grok.py +++ b/examples/foundational/14g-function-calling-grok.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -123,6 +121,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14h-function-calling-azure.py b/examples/foundational/14h-function-calling-azure.py index 71c3286e8..295021f2d 100644 --- a/examples/foundational/14h-function-calling-azure.py +++ b/examples/foundational/14h-function-calling-azure.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -131,6 +129,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14i-function-calling-fireworks.py b/examples/foundational/14i-function-calling-fireworks.py index 87adfec41..84db014ab 100644 --- a/examples/foundational/14i-function-calling-fireworks.py +++ b/examples/foundational/14i-function-calling-fireworks.py @@ -12,11 +12,10 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams -from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame +from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -31,6 +30,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -134,6 +132,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14j-function-calling-nvidia.py b/examples/foundational/14j-function-calling-nvidia.py index d18827726..4053aa247 100644 --- a/examples/foundational/14j-function-calling-nvidia.py +++ b/examples/foundational/14j-function-calling-nvidia.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.nvidia.llm import NvidiaLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -136,6 +134,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14k-function-calling-cerebras.py b/examples/foundational/14k-function-calling-cerebras.py index 93dbee8b1..ed067a59f 100644 --- a/examples/foundational/14k-function-calling-cerebras.py +++ b/examples/foundational/14k-function-calling-cerebras.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -137,6 +135,9 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14l-function-calling-deepseek.py b/examples/foundational/14l-function-calling-deepseek.py index e7de42a43..d1485a4ee 100644 --- a/examples/foundational/14l-function-calling-deepseek.py +++ b/examples/foundational/14l-function-calling-deepseek.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -137,6 +135,9 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14m-function-calling-openrouter.py b/examples/foundational/14m-function-calling-openrouter.py index ea16d503a..7c8b067b7 100644 --- a/examples/foundational/14m-function-calling-openrouter.py +++ b/examples/foundational/14m-function-calling-openrouter.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.openrouter.llm import OpenRouterLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -131,6 +129,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14n-function-calling-perplexity.py b/examples/foundational/14n-function-calling-perplexity.py index 2dac6250e..6fd3c7f85 100644 --- a/examples/foundational/14n-function-calling-perplexity.py +++ b/examples/foundational/14n-function-calling-perplexity.py @@ -16,7 +16,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -34,6 +33,8 @@ from pipecat.services.perplexity.llm import PerplexityLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -45,19 +46,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -101,6 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14o-function-calling-gemini-openai-format.py b/examples/foundational/14o-function-calling-gemini-openai-format.py index 7d0cec609..1835681bc 100644 --- a/examples/foundational/14o-function-calling-gemini-openai-format.py +++ b/examples/foundational/14o-function-calling-gemini-openai-format.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -46,19 +47,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -126,6 +124,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14p-function-calling-gemini-vertex-ai.py b/examples/foundational/14p-function-calling-gemini-vertex-ai.py index cba5eee60..712a87785 100644 --- a/examples/foundational/14p-function-calling-gemini-vertex-ai.py +++ b/examples/foundational/14p-function-calling-gemini-vertex-ai.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -132,6 +130,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14q-function-calling-qwen.py b/examples/foundational/14q-function-calling-qwen.py index f49c0631c..365e37b40 100644 --- a/examples/foundational/14q-function-calling-qwen.py +++ b/examples/foundational/14q-function-calling-qwen.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.qwen.llm import QwenLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -129,6 +127,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14r-function-calling-aws.py b/examples/foundational/14r-function-calling-aws.py index 5e005086c..d03075f97 100644 --- a/examples/foundational/14r-function-calling-aws.py +++ b/examples/foundational/14r-function-calling-aws.py @@ -10,7 +10,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -49,19 +50,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -144,6 +142,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14s-function-calling-sambanova.py b/examples/foundational/14s-function-calling-sambanova.py index 3690fa9d7..d5835dac1 100644 --- a/examples/foundational/14s-function-calling-sambanova.py +++ b/examples/foundational/14s-function-calling-sambanova.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.sambanova.stt import SambaNovaSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -133,6 +131,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14t-function-calling-direct.py b/examples/foundational/14t-function-calling-direct.py index feae09083..94da81266 100644 --- a/examples/foundational/14t-function-calling-direct.py +++ b/examples/foundational/14t-function-calling-direct.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -63,19 +64,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -130,6 +128,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14u-function-calling-ollama.py b/examples/foundational/14u-function-calling-ollama.py index f60af9f64..1db43bf23 100644 --- a/examples/foundational/14u-function-calling-ollama.py +++ b/examples/foundational/14u-function-calling-ollama.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,10 +27,11 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.ollama.llm import OLLamaLLMService -from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -52,19 +52,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -146,6 +143,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14v-function-calling-openai.py b/examples/foundational/14v-function-calling-openai.py index 06c3e2abd..effaea9d5 100644 --- a/examples/foundational/14v-function-calling-openai.py +++ b/examples/foundational/14v-function-calling-openai.py @@ -11,7 +11,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.services.openai.tts import OpenAITTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -50,19 +51,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -152,6 +150,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), ) diff --git a/examples/foundational/14w-function-calling-mistral.py b/examples/foundational/14w-function-calling-mistral.py index 82a48f6f6..fb972a25e 100644 --- a/examples/foundational/14w-function-calling-mistral.py +++ b/examples/foundational/14w-function-calling-mistral.py @@ -11,11 +11,10 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams -from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame +from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -30,6 +29,8 @@ from pipecat.services.mistral.llm import MistralLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -50,19 +51,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -140,6 +138,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/14x-function-calling-openpipe.py b/examples/foundational/14x-function-calling-openpipe.py index ac918a0ad..230de8911 100644 --- a/examples/foundational/14x-function-calling-openpipe.py +++ b/examples/foundational/14x-function-calling-openpipe.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.openpipe.llm import OpenPipeLLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -51,19 +52,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -150,6 +148,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/15-switch-voices.py b/examples/foundational/15-switch-voices.py index afa51289c..39217489d 100644 --- a/examples/foundational/15-switch-voices.py +++ b/examples/foundational/15-switch-voices.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -33,6 +32,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -95,19 +96,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -162,6 +160,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/15a-switch-languages.py b/examples/foundational/15a-switch-languages.py index 7e70b0f2d..bd651d90f 100644 --- a/examples/foundational/15a-switch-languages.py +++ b/examples/foundational/15a-switch-languages.py @@ -13,7 +13,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -34,6 +33,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -84,19 +85,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -152,6 +150,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/16-gpu-container-local-bot.py b/examples/foundational/16-gpu-container-local-bot.py index 1e40a33f5..66b8800ec 100644 --- a/examples/foundational/16-gpu-container-local-bot.py +++ b/examples/foundational/16-gpu-container-local-bot.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -32,6 +31,8 @@ from pipecat.transports.daily.transport import ( DailyParams, ) from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -43,19 +44,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -106,6 +104,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/17-detect-user-idle.py b/examples/foundational/17-detect-user-idle.py index e9671e145..b1b4fcf7c 100644 --- a/examples/foundational/17-detect-user-idle.py +++ b/examples/foundational/17-detect-user-idle.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -124,6 +122,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/20a-persistent-context-openai.py b/examples/foundational/20a-persistent-context-openai.py index 1a885b1fd..1a4a83bd2 100644 --- a/examples/foundational/20a-persistent-context-openai.py +++ b/examples/foundational/20a-persistent-context-openai.py @@ -14,7 +14,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -33,6 +32,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -163,19 +164,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -219,6 +217,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/20c-persistent-context-anthropic.py b/examples/foundational/20c-persistent-context-anthropic.py index 5584d525b..8c5709abb 100644 --- a/examples/foundational/20c-persistent-context-anthropic.py +++ b/examples/foundational/20c-persistent-context-anthropic.py @@ -14,7 +14,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -33,6 +32,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -170,19 +171,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -230,6 +228,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/20d-persistent-context-gemini.py b/examples/foundational/20d-persistent-context-gemini.py index e618b7d10..bcea44845 100644 --- a/examples/foundational/20d-persistent-context-gemini.py +++ b/examples/foundational/20d-persistent-context-gemini.py @@ -14,7 +14,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -36,6 +35,8 @@ from pipecat.services.google.llm import GoogleLLMService from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -121,7 +122,7 @@ messages = [ { "role": "system", "content": """You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your -capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that +capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way. @@ -233,14 +234,12 @@ transport_params = { audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, video_in_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -285,6 +284,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/21-tavus-transport.py b/examples/foundational/21-tavus-transport.py index b6643d668..1ff84157f 100644 --- a/examples/foundational/21-tavus-transport.py +++ b/examples/foundational/21-tavus-transport.py @@ -12,7 +12,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -26,6 +25,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.transports.tavus.transport import TavusParams, TavusTransport +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -45,7 +46,6 @@ async def main(): audio_out_enabled=True, microphone_out_enabled=False, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), ) @@ -87,6 +87,9 @@ async def main(): audio_out_sample_rate=24000, enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), ) diff --git a/examples/foundational/21a-tavus-video-service.py b/examples/foundational/21a-tavus-video-service.py index b35b315bb..b7def7b62 100644 --- a/examples/foundational/21a-tavus-video-service.py +++ b/examples/foundational/21a-tavus-video-service.py @@ -11,7 +11,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.google.llm import GoogleLLMService from pipecat.services.tavus.video import TavusVideoService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -44,7 +45,6 @@ transport_params = { video_out_width=1280, video_out_height=720, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, @@ -54,7 +54,6 @@ transport_params = { video_out_width=1280, video_out_height=720, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -107,6 +106,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): audio_out_sample_rate=24000, enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/23-bot-background-sound.py b/examples/foundational/23-bot-background-sound.py index 6a54b43e7..432a5bd7b 100644 --- a/examples/foundational/23-bot-background-sound.py +++ b/examples/foundational/23-bot-background-sound.py @@ -12,7 +12,6 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -50,7 +51,6 @@ transport_params = { volume=2.0, ), vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, @@ -61,7 +61,6 @@ transport_params = { volume=2.0, ), vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, @@ -72,7 +71,6 @@ transport_params = { volume=2.0, ), vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -114,6 +112,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/24-stt-mute-filter.py b/examples/foundational/24-stt-mute-filter.py index 7793b3dc0..af9b152f9 100644 --- a/examples/foundational/24-stt-mute-filter.py +++ b/examples/foundational/24-stt-mute-filter.py @@ -13,7 +13,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -33,6 +32,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -53,19 +54,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -136,6 +134,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/27-simli-layer.py b/examples/foundational/27-simli-layer.py index bf2d56ca0..4ad8485e3 100644 --- a/examples/foundational/27-simli-layer.py +++ b/examples/foundational/27-simli-layer.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.simli.video import SimliVideoService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -43,7 +44,6 @@ transport_params = { video_out_width=512, video_out_height=512, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, @@ -53,7 +53,6 @@ transport_params = { video_out_width=512, video_out_height=512, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -103,6 +102,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/28-transcription-processor.py b/examples/foundational/28-transcription-processor.py index 8258763be..c9493e263 100644 --- a/examples/foundational/28-transcription-processor.py +++ b/examples/foundational/28-transcription-processor.py @@ -10,7 +10,6 @@ from typing import List, Optional from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -102,19 +103,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -165,6 +163,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/29-turn-tracking-observer.py b/examples/foundational/29-turn-tracking-observer.py index 3965b2953..49622e0b7 100644 --- a/examples/foundational/29-turn-tracking-observer.py +++ b/examples/foundational/29-turn-tracking-observer.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -96,6 +94,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, observers=[UserBotLatencyLogObserver()], diff --git a/examples/foundational/30-observer.py b/examples/foundational/30-observer.py index 5a01c2934..89ecc5ae8 100644 --- a/examples/foundational/30-observer.py +++ b/examples/foundational/30-observer.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -42,6 +41,8 @@ from pipecat.transports.base_output import BaseOutputTransport from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -87,19 +88,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -143,6 +141,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, observers=[ diff --git a/examples/foundational/32-gemini-grounding-metadata.py b/examples/foundational/32-gemini-grounding-metadata.py index f782e7c36..288aa6743 100644 --- a/examples/foundational/32-gemini-grounding-metadata.py +++ b/examples/foundational/32-gemini-grounding-metadata.py @@ -12,7 +12,6 @@ from pathlib import Path from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -32,6 +31,8 @@ from pipecat.services.llm_service import LLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies sys.path.append(str(Path(__file__).parent.parent)) @@ -82,19 +83,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -143,6 +141,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, observers=[LLMSearchLoggerObserver()], diff --git a/examples/foundational/33-gemini-rag.py b/examples/foundational/33-gemini-rag.py index e2e88e390..9be69b151 100644 --- a/examples/foundational/33-gemini-rag.py +++ b/examples/foundational/33-gemini-rag.py @@ -57,7 +57,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -76,6 +75,8 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -166,19 +167,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -243,6 +241,9 @@ Your response will be turned into speech so use only simple words and punctuatio params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/34-audio-recording.py b/examples/foundational/34-audio-recording.py index cd5250dfc..da6f732fe 100644 --- a/examples/foundational/34-audio-recording.py +++ b/examples/foundational/34-audio-recording.py @@ -50,7 +50,6 @@ import aiofiles from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -69,6 +68,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -95,19 +96,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -155,6 +153,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/35-pattern-pair-voice-switching.py b/examples/foundational/35-pattern-pair-voice-switching.py index 3a102acfd..268e9479a 100644 --- a/examples/foundational/35-pattern-pair-voice-switching.py +++ b/examples/foundational/35-pattern-pair-voice-switching.py @@ -44,7 +44,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -62,6 +61,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies from pipecat.utils.text.pattern_pair_aggregator import ( MatchAction, PatternMatch, @@ -86,19 +87,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -220,6 +218,9 @@ Remember: Use narrator voice for EVERYTHING except the actual quoted dialogue."" params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/36-user-email-gathering.py b/examples/foundational/36-user-email-gathering.py index 3ca6b01a4..d3b4ace69 100644 --- a/examples/foundational/36-user-email-gathering.py +++ b/examples/foundational/36-user-email-gathering.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -31,6 +30,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -47,19 +48,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -135,6 +133,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/37-mem0.py b/examples/foundational/37-mem0.py index 436f0a452..da1e2b315 100644 --- a/examples/foundational/37-mem0.py +++ b/examples/foundational/37-mem0.py @@ -47,7 +47,6 @@ from typing import Union from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -67,6 +66,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -137,19 +138,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -268,6 +266,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, observers=[RTVIObserver(rtvi)], diff --git a/examples/foundational/38-smart-turn-fal.py b/examples/foundational/38-smart-turn-fal.py index c18c7e1c6..e0a62241c 100644 --- a/examples/foundational/38-smart-turn-fal.py +++ b/examples/foundational/38-smart-turn-fal.py @@ -28,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,25 +42,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=FalSmartTurnAnalyzer( - api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp.ClientSession() - ), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=FalSmartTurnAnalyzer( - api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp.ClientSession() - ), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=FalSmartTurnAnalyzer( - api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp.ClientSession() - ), ), } @@ -102,6 +95,16 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[ + TurnAnalyzerBotTurnStartStrategy( + turn_analyzer=FalSmartTurnAnalyzer( + api_key=os.getenv("FAL_SMART_TURN_API_KEY"), + aiohttp_session=aiohttp.ClientSession(), + ) + ) + ] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/38a-smart-turn-local-coreml.py b/examples/foundational/38a-smart-turn-local-coreml.py index 122cfb463..185120348 100644 --- a/examples/foundational/38a-smart-turn-local-coreml.py +++ b/examples/foundational/38a-smart-turn-local-coreml.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_coreml_smart_turn import LocalCoreMLSmartTurnAnalyzer from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -56,25 +57,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalCoreMLSmartTurnAnalyzer( - smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() - ), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalCoreMLSmartTurnAnalyzer( - smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() - ), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalCoreMLSmartTurnAnalyzer( - smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams() - ), ), } @@ -118,6 +110,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[ + TurnAnalyzerBotTurnStartStrategy( + turn_analyzer=LocalCoreMLSmartTurnAnalyzer( + smart_turn_model_path=smart_turn_model_path + ) + ) + ] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/38b-smart-turn-local.py b/examples/foundational/38b-smart-turn-local.py index 0f77d73b9..b5008f927 100644 --- a/examples/foundational/38b-smart-turn-local.py +++ b/examples/foundational/38b-smart-turn-local.py @@ -10,7 +10,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -20,7 +19,7 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair -from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor +from pipecat.processors.frameworks.rtvi import RTVIObserver, RTVIProcessor from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -99,6 +97,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), observers=[RTVIObserver(rtvi)], idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, diff --git a/examples/foundational/39-mcp-stdio.py b/examples/foundational/39-mcp-stdio.py index 8fe64f73b..0b9e32dd2 100644 --- a/examples/foundational/39-mcp-stdio.py +++ b/examples/foundational/39-mcp-stdio.py @@ -17,7 +17,6 @@ from loguru import logger from mcp import StdioServerParameters from PIL import Image -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -41,6 +40,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.mcp_service import MCPClient from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -119,7 +120,6 @@ transport_params = { video_out_width=1024, video_out_height=1024, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, @@ -128,7 +128,6 @@ transport_params = { video_out_width=1024, video_out_height=1024, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -210,6 +209,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/39a-mcp-streamable-http.py b/examples/foundational/39a-mcp-streamable-http.py index 4a94c328f..5f61e3bb6 100644 --- a/examples/foundational/39a-mcp-streamable-http.py +++ b/examples/foundational/39a-mcp-streamable-http.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from mcp.client.session_group import StreamableHttpParameters -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -30,6 +29,8 @@ from pipecat.services.mcp_service import MCPClient from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -41,19 +42,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -123,6 +121,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/39b-mcp-streamable-http-gemini-live.py b/examples/foundational/39b-mcp-streamable-http-gemini-live.py index 101559266..31b8dbd94 100644 --- a/examples/foundational/39b-mcp-streamable-http-gemini-live.py +++ b/examples/foundational/39b-mcp-streamable-http-gemini-live.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from mcp.client.session_group import StreamableHttpParameters -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -19,7 +18,7 @@ from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.llm_context import NOT_GIVEN, LLMContext +from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport @@ -30,6 +29,8 @@ from pipecat.services.mcp_service import MCPClient from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -41,19 +42,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -125,6 +123,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/39c-multiple-mcp.py b/examples/foundational/39c-multiple-mcp.py index e69f79f13..af635ef1b 100644 --- a/examples/foundational/39c-multiple-mcp.py +++ b/examples/foundational/39c-multiple-mcp.py @@ -20,7 +20,6 @@ from mcp.client.session_group import StreamableHttpParameters from PIL import Image from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -44,6 +43,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.mcp_service import MCPClient from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -103,7 +104,6 @@ transport_params = { video_out_width=1024, video_out_height=1024, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, @@ -112,7 +112,6 @@ transport_params = { video_out_width=1024, video_out_height=1024, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -212,6 +211,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/42-interruption-config.py b/examples/foundational/42-interruption-config.py index 9c312be03..000c06cc1 100644 --- a/examples/foundational/42-interruption-config.py +++ b/examples/foundational/42-interruption-config.py @@ -9,8 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.interruptions.min_words_interruption_strategy import MinWordsInterruptionStrategy -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +27,9 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies +from pipecat.turns.user.min_words_user_turn_start_strategy import MinWordsUserTurnStartStrategy load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -99,7 +97,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, - interruption_strategies=[MinWordsInterruptionStrategy(min_words=3)], + turn_start_strategies=TurnStartStrategies( + user=[MinWordsUserTurnStartStrategy(min_words=3)], + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())], + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/43-heygen-transport.py b/examples/foundational/43-heygen-transport.py index 859206cce..e0e9032b8 100644 --- a/examples/foundational/43-heygen-transport.py +++ b/examples/foundational/43-heygen-transport.py @@ -12,7 +12,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.google.llm import GoogleLLMService from pipecat.transports.heygen.transport import HeyGenParams, HeyGenTransport +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -44,7 +45,6 @@ async def main(): audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), ) @@ -84,6 +84,9 @@ async def main(): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), ) diff --git a/examples/foundational/43a-heygen-video-service.py b/examples/foundational/43a-heygen-video-service.py index 1add6aeea..8b5c2bb59 100644 --- a/examples/foundational/43a-heygen-video-service.py +++ b/examples/foundational/43a-heygen-video-service.py @@ -10,7 +10,6 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.heygen.client import ServiceType from pipecat.services.heygen.video import HeyGenVideoService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams, DailyTransport +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -45,7 +46,6 @@ transport_params = { video_out_height=720, video_out_bitrate=2_000_000, # 2MBps vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, @@ -55,7 +55,6 @@ transport_params = { video_out_width=1280, video_out_height=720, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -106,6 +105,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/44-voicemail-detection.py b/examples/foundational/44-voicemail-detection.py index 3d12151da..1c8c164db 100644 --- a/examples/foundational/44-voicemail-detection.py +++ b/examples/foundational/44-voicemail-detection.py @@ -9,7 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -100,6 +98,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/45-before-and-after-events.py b/examples/foundational/45-before-and-after-events.py index 1cffd533c..916065c26 100644 --- a/examples/foundational/45-before-and-after-events.py +++ b/examples/foundational/45-before-and-after-events.py @@ -10,7 +10,6 @@ from dataclasses import dataclass from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -50,19 +51,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -106,6 +104,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/47-sentry-metrics.py b/examples/foundational/47-sentry-metrics.py index 2f7369349..eb63060c7 100644 --- a/examples/foundational/47-sentry-metrics.py +++ b/examples/foundational/47-sentry-metrics.py @@ -10,7 +10,6 @@ import sentry_sdk from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -29,6 +28,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -40,19 +41,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -109,6 +107,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/48-service-switcher.py b/examples/foundational/48-service-switcher.py index cb2757611..332a9bf3c 100644 --- a/examples/foundational/48-service-switcher.py +++ b/examples/foundational/48-service-switcher.py @@ -12,7 +12,6 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -36,6 +35,8 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -64,19 +65,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -154,6 +152,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/foundational/49a-thinking-anthropic.py b/examples/foundational/49a-thinking-anthropic.py index 4066a15c0..3c49f2962 100644 --- a/examples/foundational/49a-thinking-anthropic.py +++ b/examples/foundational/49a-thinking-anthropic.py @@ -9,7 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -28,6 +27,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies load_dotenv(override=True) @@ -39,19 +40,16 @@ transport_params = { audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()), ), } @@ -104,6 +102,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, ) diff --git a/examples/quickstart/bot.py b/examples/quickstart/bot.py index f353ecf6f..23cd752ef 100644 --- a/examples/quickstart/bot.py +++ b/examples/quickstart/bot.py @@ -23,6 +23,8 @@ import os from dotenv import load_dotenv from loguru import logger +from pipecat.turns.bot.turn_analyzer_bot_turn_start_strategy import TurnAnalyzerBotTurnStartStrategy +from pipecat.turns.turn_start_strategies import TurnStartStrategies print("🚀 Starting Pipecat bot...") print("⏳ Loading models and imports (20 seconds, first run only)\n") @@ -101,6 +103,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, + turn_start_strategies=TurnStartStrategies( + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), ), observers=[RTVIObserver(rtvi)], ) @@ -130,13 +135,11 @@ async def bot(runner_args: RunnerArguments): audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), - turn_analyzer=LocalSmartTurnAnalyzerV3(), ), } From 9e56d1ac650ebff62732909fbf1ef8e4813c1270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 18 Dec 2025 22:22:13 -0800 Subject: [PATCH 24/30] TurnStartStrategies: set user and bot strategies defaults if None --- src/pipecat/pipeline/task.py | 12 +--------- src/pipecat/turns/turn_start_strategies.py | 26 +++++++++++++++++++--- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index 56cf7d2a1..17bb4f91e 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -47,14 +47,7 @@ from pipecat.pipeline.base_task import BasePipelineTask, PipelineTaskParams from pipecat.pipeline.pipeline import Pipeline, PipelineSink, PipelineSource from pipecat.pipeline.task_observer import TaskObserver from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup -from pipecat.turns.bot.transcription_bot_turn_start_strategy import ( - TranscriptionBotTurnStartStrategy, -) from pipecat.turns.turn_start_strategies import TurnStartStrategies -from pipecat.turns.user.transcription_user_turn_start_strategy import ( - TranscriptionUserTurnStartStrategy, -) -from pipecat.turns.user.vad_user_turn_start_strategy import VADUserTurnStartStrategy from pipecat.utils.asyncio.task_manager import BaseTaskManager, TaskManager, TaskManagerParams from pipecat.utils.tracing.setup import is_tracing_available from pipecat.utils.tracing.turn_trace_observer import TurnTraceObserver @@ -295,10 +288,7 @@ class PipelineTask(BasePipelineTask): # Initialize default user and bot turn start strategies. if not self._params.turn_start_strategies: - self._params.turn_start_strategies = TurnStartStrategies( - user=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()], - bot=[TranscriptionBotTurnStartStrategy()], - ) + self._params.turn_start_strategies = TurnStartStrategies() self._finished = False self._cancelled = False diff --git a/src/pipecat/turns/turn_start_strategies.py b/src/pipecat/turns/turn_start_strategies.py index a3a344ef3..5ab4878fd 100644 --- a/src/pipecat/turns/turn_start_strategies.py +++ b/src/pipecat/turns/turn_start_strategies.py @@ -7,10 +7,17 @@ """Turn start strategy configuration.""" from dataclasses import dataclass -from typing import List +from typing import List, Optional from pipecat.turns.bot.base_bot_turn_start_strategy import BaseBotTurnStartStrategy +from pipecat.turns.bot.transcription_bot_turn_start_strategy import ( + TranscriptionBotTurnStartStrategy, +) from pipecat.turns.user.base_user_turn_start_strategy import BaseUserTurnStartStrategy +from pipecat.turns.user.transcription_user_turn_start_strategy import ( + TranscriptionUserTurnStartStrategy, +) +from pipecat.turns.user.vad_user_turn_start_strategy import VADUserTurnStartStrategy @dataclass @@ -20,12 +27,25 @@ class TurnStartStrategies: This class groups the configured turn start strategies for both the user and the bot. + If no strategies are specified for the user or the bot, the following + defaults are used: + + user: [VADUserTurnStartStrategy, TranscriptionUserTurnStartStrategy] + bot: [TranscriptionBotTurnStartStrategy] + Attributes: user: A list of user turn start strategies used to detect when the user starts speaking. bot: A list of bot turn start strategies used to decide when the bot should start speaking. + """ - user: List[BaseUserTurnStartStrategy] - bot: List[BaseBotTurnStartStrategy] + user: Optional[List[BaseUserTurnStartStrategy]] = None + bot: Optional[List[BaseBotTurnStartStrategy]] = None + + def __post_init__(self): + if not self.user: + self.user = [VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()] + if not self.bot: + self.bot = [TranscriptionBotTurnStartStrategy()] From c9048d3a0f9a44829fa42799fcd584c4d9a72d58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Fri, 19 Dec 2025 09:13:39 -0800 Subject: [PATCH 25/30] LLMUserAggregator: prevent consecutive user/bot turn starts --- .../aggregators/llm_response_universal.py | 53 ++++++++++++------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 9c687492c..2e4dfbe65 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -241,6 +241,7 @@ class LLMUserAggregator(LLMContextAggregator): """ super().__init__(context=context, role="user", **kwargs) self._params = params or LLMUserAggregatorParams() + self._user_speaking = False async def cleanup(self): """Clean up processor resources.""" @@ -251,9 +252,11 @@ class LLMUserAggregator(LLMContextAggregator): """Reset the aggregation state and interruption strategies.""" await super().reset() - if self.turn_start_strategies: + if self.turn_start_strategies and self.turn_start_strategies.user: for s in self.turn_start_strategies.user: await s.reset() + + if self.turn_start_strategies and self.turn_start_strategies.bot: for s in self.turn_start_strategies.bot: await s.reset() @@ -313,20 +316,19 @@ class LLMUserAggregator(LLMContextAggregator): await self.push_context_frame() async def _start(self, frame: StartFrame): - if not self.turn_start_strategies: - return + if self.turn_start_strategies and self.turn_start_strategies.user: + for s in self.turn_start_strategies.user: + await s.setup(self.task_manager) + s.add_event_handler("on_push_frame", self._on_push_frame) + s.add_event_handler("on_broadcast_frame", self._on_broadcast_frame) + s.add_event_handler("on_user_turn_started", self._on_user_turn_started) - for s in self.turn_start_strategies.user: - await s.setup(self.task_manager) - s.add_event_handler("on_push_frame", self._on_push_frame) - s.add_event_handler("on_broadcast_frame", self._on_broadcast_frame) - s.add_event_handler("on_user_turn_started", self._on_user_turn_started) - - for s in self.turn_start_strategies.bot: - await s.setup(self.task_manager) - s.add_event_handler("on_push_frame", self._on_push_frame) - s.add_event_handler("on_broadcast_frame", self._on_broadcast_frame) - s.add_event_handler("on_bot_turn_started", self._on_bot_turn_started) + if self.turn_start_strategies and self.turn_start_strategies.bot: + for s in self.turn_start_strategies.bot: + await s.setup(self.task_manager) + s.add_event_handler("on_push_frame", self._on_push_frame) + s.add_event_handler("on_broadcast_frame", self._on_broadcast_frame) + s.add_event_handler("on_bot_turn_started", self._on_bot_turn_started) async def _stop(self, frame: EndFrame): await self._cleanup() @@ -335,17 +337,20 @@ class LLMUserAggregator(LLMContextAggregator): await self._cleanup() async def _cleanup(self): - if self.turn_start_strategies: + if self.turn_start_strategies and self.turn_start_strategies.user: for s in self.turn_start_strategies.user: await s.cleanup() + + if self.turn_start_strategies and self.turn_start_strategies.bot: for s in self.turn_start_strategies.bot: await s.cleanup() async def _turn_start_strategies_process_frame(self, frame: Frame): - if self.turn_start_strategies: + if self.turn_start_strategies and self.turn_start_strategies.user: for strategy in self.turn_start_strategies.user: await strategy.process_frame(frame) + if self.turn_start_strategies and self.turn_start_strategies.bot: for strategy in self.turn_start_strategies.bot: await strategy.process_frame(frame) @@ -399,8 +404,14 @@ class LLMUserAggregator(LLMContextAggregator): await self.broadcast_frame(frame_cls, **kwargs) async def _trigger_user_turn_start(self, strategy: BaseUserTurnStartStrategy): + # Prevent two consecutive user turn starts. + if self._user_speaking: + return + + self._user_speaking = True + # Reset all user turn start strategies to start fresh. - if self.turn_start_strategies: + if self.turn_start_strategies and self.turn_start_strategies.user: for s in self.turn_start_strategies.user: await s.reset() @@ -411,8 +422,14 @@ class LLMUserAggregator(LLMContextAggregator): await self.broadcast_frame(InterruptionFrame) async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy): + # Prevent two consecutive bot turn starts. + if not self._user_speaking: + return + + self._user_speaking = False + # Reset all bot turn start strategies to start fresh. - if self.turn_start_strategies: + if self.turn_start_strategies and self.turn_start_strategies.bot: for s in self.turn_start_strategies.bot: await s.reset() From 4a32aa5266a6b7dd37ca3409ea0284eba7b11781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Fri, 19 Dec 2025 09:14:29 -0800 Subject: [PATCH 26/30] TurnAnalyzerBotTurnStartStrategy: don't use text on interim transcriptions --- .../turns/bot/turn_analyzer_bot_turn_start_strategy.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py b/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py index 5d2aaf020..b96b347a4 100644 --- a/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py +++ b/src/pipecat/turns/bot/turn_analyzer_bot_turn_start_strategy.py @@ -89,8 +89,10 @@ class TurnAnalyzerBotTurnStartStrategy(BaseBotTurnStartStrategy): await self._handle_vad_user_stopped_speaking(frame) elif isinstance(frame, InputAudioRawFrame): await self._handle_input_audio(frame) - elif isinstance(frame, (TranscriptionFrame, InterimTranscriptionFrame)): + elif isinstance(frame, TranscriptionFrame): await self._handle_transcription(frame) + elif isinstance(frame, InterimTranscriptionFrame): + await self._handle_interim_transcription(frame) async def _start(self, frame: StartFrame): """Process the start frame to configure the turn analyzer.""" @@ -116,12 +118,16 @@ class TurnAnalyzerBotTurnStartStrategy(BaseBotTurnStartStrategy): await self._handle_prediction_result(prediction) await self._handle_end_of_turn(state) - async def _handle_transcription(self, frame: TranscriptionFrame | InterimTranscriptionFrame): + async def _handle_transcription(self, frame: TranscriptionFrame): """Handle user transcription.""" # We don't really care about the content. self._text = frame.text self._event.set() + async def _handle_interim_transcription(self, frame: InterimTranscriptionFrame): + """Handle user interim transcription.""" + self._event.set() + async def _handle_end_of_turn(self, state: EndOfTurnState): """Handle completion of end-of-turn analysis.""" if state == EndOfTurnState.COMPLETE: From 9bf6668b52646b768e1de8d0860083f84f9fa708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Fri, 19 Dec 2025 12:02:53 -0800 Subject: [PATCH 27/30] LLMUserAggregator: show error if using turn analyzer in transport --- .../aggregators/llm_response_universal.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index 2e4dfbe65..cad371ef3 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -44,6 +44,7 @@ from pipecat.frames.frames import ( LLMThoughtEndFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, + SpeechControlParamsFrame, StartFrame, TextFrame, TranscriptionFrame, @@ -300,6 +301,8 @@ class LLMUserAggregator(LLMContextAggregator): await self.push_frame(frame, direction) elif isinstance(frame, LLMSetToolChoiceFrame): self.set_tool_choice(frame.tool_choice) + elif isinstance(frame, SpeechControlParamsFrame): + await self._handle_speech_control_params(frame) else: await self.push_frame(frame, direction) @@ -367,6 +370,16 @@ class LLMUserAggregator(LLMContextAggregator): if frame.run_llm: await self.push_context_frame() + async def _handle_speech_control_params(self, frame: SpeechControlParamsFrame): + if not frame.turn_params: + return + + logger.error( + f"{self}: turn_analyzer in base input transport is deprecated and " + "might result in unexpected behavior. Use PipelineTask's turn_start_strategies with " + "TurnAnalyzerBotTurnStartStrategy instead." + ) + async def _handle_transcription(self, frame: TranscriptionFrame): text = frame.text From 5816f960cc66aeaa33e9d165314d9646376d3b10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Fri, 19 Dec 2025 13:33:06 -0800 Subject: [PATCH 28/30] LLMUserAggregator: add on_user_turn_started/on_bot_turn_started events --- .../aggregators/llm_response_universal.py | 42 ++++++++++++++----- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index cad371ef3..4e918f440 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -211,19 +211,32 @@ class LLMContextAggregator(FrameProcessor): class LLMUserAggregator(LLMContextAggregator): - """User LLM aggregator that processes speech-to-text transcriptions. + """User LLM aggregator that aggregates user input during active user turns. - This aggregator handles the complex logic of aggregating user speech transcriptions - from STT services. It manages multiple scenarios including: + This aggregator operates within turn boundaries defined by the configured + user and bot turn start strategies. User turn start strategies indicate when + a user turn begins, while bot turn start strategies signal when the user + turn has ended and control transitions to the bot turn. - - Transcriptions received between VAD events - - Transcriptions received outside VAD events - - Interim vs final transcriptions - - User interruptions during bot speech - - Emulated VAD for whispered or short utterances + The aggregator collects and aggregates speech-to-text transcriptions that + occur while a user turn is active and pushes the final aggregation when the + user turn is finished. + + Event handlers available: + + - on_user_turn_started: Called when the user turn starts + - on_bot_turn_started: Called when the user turn ends and it is now the bot’s turn + + Example:: + + @aggregator.event_handler("on_user_turn_started") + async def on_user_turn_started(aggregator, strategy): + ... + + @aggregator.event_handler("on_bot_turn_started") + async def on_bot_turn_started(aggregator, strategy): + ... - The aggregator uses timeouts to handle cases where transcriptions arrive - after VAD events or when no VAD is available. """ def __init__( @@ -238,12 +251,15 @@ class LLMUserAggregator(LLMContextAggregator): Args: context: The LLM context for conversation storage. params: Configuration parameters for aggregation behavior. - **kwargs: Additional arguments. Supports deprecated 'aggregation_timeout'. + **kwargs: Additional arguments. """ super().__init__(context=context, role="user", **kwargs) self._params = params or LLMUserAggregatorParams() self._user_speaking = False + self._register_event_handler("on_user_turn_started") + self._register_event_handler("on_bot_turn_started") + async def cleanup(self): """Clean up processor resources.""" await super().cleanup() @@ -434,6 +450,8 @@ class LLMUserAggregator(LLMContextAggregator): await self.broadcast_frame(UserStartedSpeakingFrame) await self.broadcast_frame(InterruptionFrame) + await self._call_event_handler("on_user_turn_started", strategy) + async def _trigger_bot_turn_start(self, strategy: BaseBotTurnStartStrategy): # Prevent two consecutive bot turn starts. if not self._user_speaking: @@ -451,6 +469,8 @@ class LLMUserAggregator(LLMContextAggregator): # TODO(aleix): This frame should really come from the top of the pipeline. await self.broadcast_frame(UserStoppedSpeakingFrame) + await self._call_event_handler("on_bot_turn_started", strategy) + # Always push context frame. await self.push_aggregation() From bf791527dccc4375bdd6adb2073902623ec4f2bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 12 Nov 2025 15:35:46 -0800 Subject: [PATCH 29/30] update CHANGELOG for new user/bot turn start strategies --- changelog/3045.added.md | 37 ++++++++++++++++++++++++++++++++++ changelog/3045.deprecated.2.md | 1 + changelog/3045.deprecated.3.md | 1 + changelog/3045.deprecated.4.md | 1 + changelog/3045.deprecated.5.md | 1 + changelog/3045.deprecated.6.md | 1 + changelog/3045.deprecated.md | 1 + 7 files changed, 43 insertions(+) create mode 100644 changelog/3045.added.md create mode 100644 changelog/3045.deprecated.2.md create mode 100644 changelog/3045.deprecated.3.md create mode 100644 changelog/3045.deprecated.4.md create mode 100644 changelog/3045.deprecated.5.md create mode 100644 changelog/3045.deprecated.6.md create mode 100644 changelog/3045.deprecated.md diff --git a/changelog/3045.added.md b/changelog/3045.added.md new file mode 100644 index 000000000..e29251335 --- /dev/null +++ b/changelog/3045.added.md @@ -0,0 +1,37 @@ +- Introducing user and bot turn start strategies. Turn start strategies indicate when user and bot turns begin. In conversational agents, these are often referred to as start/stop speaking or turn-taking plans or policies. + + User turn start strategies indicate when the user starts speaking (e.g. using VAD events or when a user says one or more words). + + Bot turn start strategies indicate when the bot should start speaking (e.g. using an end-of-turn detection model or by observing incoming transcriptions). + + A list of strategies can be specified for both the user and the bot; strategies are evaluated in order until one evaluates to true. + + Available user turn start strategies: + - VADUserTurnStartStrategy + - TranscriptionUserTurnStartStrategy + - MinWordsUserTurnStartStrategy + + Available bot turn start strategies: + - TranscriptionBotTurnStartStrategy + - TurnAnalyzerBotTurnStartStrategy + + The default strategies are: + + - user: [VADUserTurnStartStrategy, TranscriptionUserTurnStartStrategy] + - bot: [TranscriptionBotTurnStartStrategy] + + Turn start strategies are configured when setting up a `PipelineTask`. For example: + + ```python + task = PipelineTask(..., params=PipelineParams( + turn_start_strategies=TurnStartStrategies( + bot=[ + TurnAnalyzerBotTurnStartStrategy( + turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()) + ) + ], + ), + )) + ``` + + In order to use the turn start strategies you should update to the new universal `LLMContext` and `LLMContextAggregatorPair`. diff --git a/changelog/3045.deprecated.2.md b/changelog/3045.deprecated.2.md new file mode 100644 index 000000000..0947669d0 --- /dev/null +++ b/changelog/3045.deprecated.2.md @@ -0,0 +1 @@ +- ⚠️ `TransportParams.turn_analyzer` is deprecated and might result in unexpected behavior, use `PipelineTask`'s new `turn_start_strategies` parameter instead. diff --git a/changelog/3045.deprecated.3.md b/changelog/3045.deprecated.3.md new file mode 100644 index 000000000..33c464ee5 --- /dev/null +++ b/changelog/3045.deprecated.3.md @@ -0,0 +1 @@ +- `FrameProcessor.interruption_strategies` is deprecated, use `PipelineTask`'s new `turn_start_strategies` parameter instead. diff --git a/changelog/3045.deprecated.4.md b/changelog/3045.deprecated.4.md new file mode 100644 index 000000000..fda634ce8 --- /dev/null +++ b/changelog/3045.deprecated.4.md @@ -0,0 +1 @@ +- `EmulateUserStartedSpeakingFrame` and `EmulateUserStoppedSpeakingFrame` frames are deprecated. diff --git a/changelog/3045.deprecated.5.md b/changelog/3045.deprecated.5.md new file mode 100644 index 000000000..57781a489 --- /dev/null +++ b/changelog/3045.deprecated.5.md @@ -0,0 +1 @@ +- Deprecated the `emulated` field in the `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame` frames. diff --git a/changelog/3045.deprecated.6.md b/changelog/3045.deprecated.6.md new file mode 100644 index 000000000..3bf804220 --- /dev/null +++ b/changelog/3045.deprecated.6.md @@ -0,0 +1 @@ +- The `LLMUserAggregatorParams` and `LLMAssistantAggregatorParams` classes in `pipecat.processors.aggregators.llm_response` are now deprecated. Use the new universal `LLMContext` and `LLMContextAggregatorPair` instead. diff --git a/changelog/3045.deprecated.md b/changelog/3045.deprecated.md new file mode 100644 index 000000000..9b0977f5d --- /dev/null +++ b/changelog/3045.deprecated.md @@ -0,0 +1 @@ +- `pipecat.audio.interruptions.MinWordsInterruptionStrategy` is deprecated. Use `pipecat.turns.user.MinWordsUserTurnStartStrategy` with `PipelineTask`'s new `turn_start_strategies` parameter instead. From ec7a7ed0486d8100e3181cf1b9f627f3e1929865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Fri, 19 Dec 2025 13:16:00 -0800 Subject: [PATCH 30/30] add RNNoiseFilter to changelog and update pyrnnoise to 0.4.1 --- changelog/3205.added.md | 1 + pyproject.toml | 2 +- uv.lock | 53 +++++++++++++++++++++++++++++++++++------ 3 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 changelog/3205.added.md diff --git a/changelog/3205.added.md b/changelog/3205.added.md new file mode 100644 index 000000000..dc72a1cf0 --- /dev/null +++ b/changelog/3205.added.md @@ -0,0 +1 @@ +- Added `RNNoiseFilter` for real-time noise suppression using RNNoise neural network via pyrnnoise library. diff --git a/pyproject.toml b/pyproject.toml index b927bafe4..f3c3ae7c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,7 +87,7 @@ neuphonic = [ "pipecat-ai[websockets-base]" ] noisereduce = [ "noisereduce~=3.0.3" ] nvidia = [ "nvidia-riva-client~=2.21.1" ] openai = [ "pipecat-ai[websockets-base]" ] -rnnoise = [ "pyrnnoise~=0.2.0" ] +rnnoise = [ "pyrnnoise~=0.4.1" ] openpipe = [ "openpipe>=4.50.0,<6" ] openrouter = [] perplexity = [] diff --git a/uv.lock b/uv.lock index f9b826937..560620aee 100644 --- a/uv.lock +++ b/uv.lock @@ -315,6 +315,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/aa/ab0f7891a01eeb2d2e338ae8fecbe57fcebea1a24dbb64d45801bfab481d/attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308", size = 63397, upload-time = "2024-12-16T06:59:26.977Z" }, ] +[[package]] +name = "audiolab" +version = "0.4.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "av" }, + { name = "click" }, + { name = "humanize" }, + { name = "jinja2" }, + { name = "smart-open" }, + { name = "soundfile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1c/62/343e39ff6560517ffb02c21796d155df4a019eea235ab7f86e46f8b10a73/audiolab-0.4.7.tar.gz", hash = "sha256:9a4618fd39601d5dd366f5dca3a0d23e6eacf5ee0d824ece2bc74ab15c8342b3", size = 31885, upload-time = "2025-12-19T07:40:13.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/ac/7dc51c1a0b15ba9162d9d506a7a9f2589f0258b2810dad0a193cc781b7d6/audiolab-0.4.7-py3-none-any.whl", hash = "sha256:52ea93f0c0950727f6ab79c90d95910ee6b3a608bbe1bcd33ce51530b3de064e", size = 50938, upload-time = "2025-12-19T07:40:12.098Z" }, +] + [[package]] name = "audioop-lts" version = "0.2.1" @@ -2050,6 +2067,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794, upload-time = "2021-09-17T21:40:39.897Z" }, ] +[[package]] +name = "humanize" +version = "4.14.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/43/50033d25ad96a7f3845f40999b4778f753c3901a11808a584fed7c00d9f5/humanize-4.14.0.tar.gz", hash = "sha256:2fa092705ea640d605c435b1ca82b2866a1b601cdf96f076d70b79a855eba90d", size = 82939, upload-time = "2025-10-15T13:04:51.214Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl", hash = "sha256:d57701248d040ad456092820e6fde56c930f17749956ac47f4f655c0c547bfff", size = 132092, upload-time = "2025-10-15T13:04:49.404Z" }, +] + [[package]] name = "hume" version = "0.12.1" @@ -4157,7 +4183,7 @@ requires-dist = [ { name = "pygobject", marker = "extra == 'gstreamer'", specifier = "~=3.50.0" }, { name = "pyjwt", marker = "extra == 'livekit'", specifier = ">=2.10.1" }, { name = "pyloudnorm", specifier = "~=0.1.1" }, - { name = "pyrnnoise", marker = "extra == 'rnnoise'", specifier = "~=0.2.0" }, + { name = "pyrnnoise", marker = "extra == 'rnnoise'", specifier = "~=0.4.1" }, { name = "python-dotenv", marker = "extra == 'runner'", specifier = ">=1.0.0,<2.0.0" }, { name = "pyvips", extras = ["binary"], marker = "extra == 'moondream'", specifier = "~=3.0.0" }, { name = "resampy", specifier = "~=0.4.3" }, @@ -4780,18 +4806,19 @@ wheels = [ [[package]] name = "pyrnnoise" -version = "0.2.7" +version = "0.4.1" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "audiolab" }, + { name = "click" }, + { name = "matplotlib" }, { name = "numpy" }, - { name = "soundfile" }, - { name = "soxr" }, { name = "tqdm" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/29/5a/d7433a898cc3c8cf9621f74d8671b052511811d9db263cf79cb224e463dc/pyrnnoise-0.2.7-py3-none-macosx_14_0_universal2.whl", hash = "sha256:fec5305080d2edfdc74b0f8beb8243a59e9a4a55a54db0ef8564510568a9eefe", size = 13366079, upload-time = "2024-10-02T12:26:46.199Z" }, - { url = "https://files.pythonhosted.org/packages/b5/a0/d624dfcbdb94a57047d17c923a2bfc7dfa170458b6a38f97868d89d6d284/pyrnnoise-0.2.7-py3-none-manylinux1_x86_64.whl", hash = "sha256:ce54addc6c4ff3c8a4c48e9e4d14640ca175c39b06a63b44da3f3a34d3ba8895", size = 13261826, upload-time = "2024-10-02T12:26:50.98Z" }, - { url = "https://files.pythonhosted.org/packages/ff/26/eed8b1dfd122c1523e4cafd0ff19bf4b59a79fe6a791a486a3fa3712e070/pyrnnoise-0.2.7-py3-none-win_amd64.whl", hash = "sha256:8451f98c715e2ce834a405162f7b14c30d730c4dd95ed3c5faecbb92257f8dd2", size = 13255252, upload-time = "2024-10-02T12:28:43.187Z" }, + { url = "https://files.pythonhosted.org/packages/59/49/7017ffa14230096e0271bd49dfd9ab60a32bfebe7e71399c2a0e38c6f859/pyrnnoise-0.4.1-py3-none-macosx_15_0_universal2.whl", hash = "sha256:c1fe407729190d0f84f3e3c9d9322ebbd33b27f3f5d9f7217379b71a4dd043e7", size = 13381833, upload-time = "2025-11-25T15:54:06.532Z" }, + { url = "https://files.pythonhosted.org/packages/8e/24/fb8b7bafb3dd9cbb46e134fa25c9597683c61b42c0133453fefeebeb0066/pyrnnoise-0.4.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ddd39b45221b65fb235f882a0ce127513a1012d41c5b3ba9dc4e9e991b22c205", size = 13273307, upload-time = "2025-11-25T15:54:04.076Z" }, + { url = "https://files.pythonhosted.org/packages/7f/8e/eef9b2022fa5b9a111ba31d2f25ccd6e45da3daf16d20352e1fb18fd81dd/pyrnnoise-0.4.1-py3-none-win_amd64.whl", hash = "sha256:440e32359256eb7947e29fb080e800e984ba521fbe89a8b0b2f5dc196965e441", size = 13267076, upload-time = "2025-11-25T15:54:37.547Z" }, ] [[package]] @@ -5740,6 +5767,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "smart-open" +version = "7.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/67/9a/0a7acb748b86e2922982366d780ca4b16c33f7246fa5860d26005c97e4f3/smart_open-7.5.0.tar.gz", hash = "sha256:f394b143851d8091011832ac8113ea4aba6b92e6c35f6e677ddaaccb169d7cb9", size = 53920, upload-time = "2025-11-08T21:38:40.698Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/95/bc978be7ea0babf2fb48a414b6afaad414c6a9e8b1eafc5b8a53c030381a/smart_open-7.5.0-py3-none-any.whl", hash = "sha256:87e695c5148bbb988f15cec00971602765874163be85acb1c9fb8abc012e6599", size = 63940, upload-time = "2025-11-08T21:38:39.024Z" }, +] + [[package]] name = "smithy-aws-core" version = "0.2.0"