diff --git a/changelog/3391.added.md b/changelog/3391.added.md new file mode 100644 index 000000000..7dfaa9a2f --- /dev/null +++ b/changelog/3391.added.md @@ -0,0 +1 @@ +- Added `KrispVivaTurn` analyzer for end of turn detection using the Krisp VIVA SDK (requires `krisp_audio`). diff --git a/changelog/3391.changed.md b/changelog/3391.changed.md new file mode 100644 index 000000000..fb12beac0 --- /dev/null +++ b/changelog/3391.changed.md @@ -0,0 +1 @@ +- Improved Krisp SDK management to allow `KrispVivaTurn` and `KrispVivaFilter` to share a single SDK instance within the same process. diff --git a/changelog/3391.fixed.md b/changelog/3391.fixed.md new file mode 100644 index 000000000..95c14ebd5 --- /dev/null +++ b/changelog/3391.fixed.md @@ -0,0 +1 @@ +- Fixed potential memory leaks and initialization issues in `KrispVivaFilter` by improving SDK lifecycle management. \ No newline at end of file diff --git a/examples/foundational/07p-interruptible-krisp-viva.py b/examples/foundational/07p-interruptible-krisp-viva.py index 57dc8beb5..ca11ef90e 100644 --- a/examples/foundational/07p-interruptible-krisp-viva.py +++ b/examples/foundational/07p-interruptible-krisp-viva.py @@ -4,6 +4,23 @@ # SPDX-License-Identifier: BSD 2-Clause License # +"""Interruptible bot with Krisp VIVA noise filtering and turn detection. + +This example demonstrates a conversational bot with: +- Krisp VIVA noise reduction on incoming audio +- Krisp VIVA Turn detection for natural interruptions +- Voice activity detection (VAD) + +Required environment variables: +- KRISP_VIVA_FILTER_MODEL_PATH: Path to the Krisp noise filter model file (.kef) +- KRISP_VIVA_TURN_MODEL_PATH: Path to the Krisp turn detection model file (.kef) +- DEEPGRAM_API_KEY: Deepgram API key for STT/TTS +- OPENAI_API_KEY: OpenAI API key for LLM + +Optional environment variables: +- KRISP_NOISE_SUPPRESSION_LEVEL: Noise suppression level 0-100 (default: 100) + Higher values = more aggressive noise reduction +""" import os @@ -11,7 +28,7 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter -from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 +from pipecat.audio.turn.krisp_viva_turn import KrispTurnParams, KrispVivaTurn from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import LLMRunFrame @@ -82,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): context, user_params=LLMUserAggregatorParams( user_turn_strategies=UserTurnStrategies( - stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=KrispVivaTurn())] ), ), ) diff --git a/src/pipecat/audio/turn/base_turn_analyzer.py b/src/pipecat/audio/turn/base_turn_analyzer.py index 942214738..8d4b5683a 100644 --- a/src/pipecat/audio/turn/base_turn_analyzer.py +++ b/src/pipecat/audio/turn/base_turn_analyzer.py @@ -132,3 +132,7 @@ class BaseTurnAnalyzer(ABC): def clear(self): """Reset the turn analyzer to its initial state.""" pass + + async def cleanup(self): + """Cleanup the turn analyzer.""" + pass diff --git a/src/pipecat/audio/turn/krisp_viva_turn.py b/src/pipecat/audio/turn/krisp_viva_turn.py index dbd6d6dcd..04e59421f 100644 --- a/src/pipecat/audio/turn/krisp_viva_turn.py +++ b/src/pipecat/audio/turn/krisp_viva_turn.py @@ -133,7 +133,7 @@ class KrispVivaTurn(BaseTurnAnalyzer): self._sdk_acquired = False raise - def __del__(self): + async def cleanup(self): """Release SDK reference when analyzer is destroyed.""" if self._sdk_acquired: try: @@ -192,8 +192,7 @@ class KrispVivaTurn(BaseTurnAnalyzer): # Create session when sample rate is set try: self._tt_session = self._create_tt_session(self._sample_rate) - # Clear buffer when sample rate changes - self._audio_buffer.clear() + self.clear() except Exception as e: logger.error(f"Failed to create turn detection session: {e}", exc_info=True) self._tt_session = None @@ -310,7 +309,7 @@ class KrispVivaTurn(BaseTurnAnalyzer): # confirms with sufficient confidence if self._speech_triggered and prob >= self._params.threshold: state = EndOfTurnState.COMPLETE - self._clear(state) + self.clear() break # Store the last state for analyze_end_of_turn() @@ -336,18 +335,6 @@ class KrispVivaTurn(BaseTurnAnalyzer): def clear(self): """Reset the turn analyzer to its initial state.""" - self._clear(EndOfTurnState.COMPLETE) - - def _clear(self, turn_state: EndOfTurnState): - """Clear internal state based on turn completion status. - - Args: - turn_state: The end-of-turn state to use for clearing. - """ - # If the state is still incomplete, keep the _speech_triggered as True - self._speech_triggered = turn_state == EndOfTurnState.INCOMPLETE - # Clear audio buffer on turn completion - if turn_state == EndOfTurnState.COMPLETE: - self._audio_buffer.clear() - # Reset last state when clearing + self._speech_triggered = False + self._audio_buffer.clear() self._last_state = EndOfTurnState.INCOMPLETE diff --git a/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py index 16802795a..8db17e1ef 100644 --- a/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py +++ b/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py @@ -73,6 +73,7 @@ class TurnAnalyzerUserTurnStopStrategy(BaseUserTurnStopStrategy): async def cleanup(self): """Cleanup the strategy.""" await super().cleanup() + await self._turn_analyzer.cleanup() if self._task: await self.task_manager.cancel_task(self._task) self._task = None