Merge pull request #3391 from pipecat-ai/filipi/krisp_followup_improvements

Krisp VIVA follow-up improvements
This commit is contained in:
Filipi da Silva Fuchter
2026-01-09 10:39:23 -05:00
committed by GitHub
7 changed files with 32 additions and 20 deletions

1
changelog/3391.added.md Normal file
View File

@@ -0,0 +1 @@
- Added `KrispVivaTurn` analyzer for end of turn detection using the Krisp VIVA SDK (requires `krisp_audio`).

View File

@@ -0,0 +1 @@
- Improved Krisp SDK management to allow `KrispVivaTurn` and `KrispVivaFilter` to share a single SDK instance within the same process.

1
changelog/3391.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed potential memory leaks and initialization issues in `KrispVivaFilter` by improving SDK lifecycle management.

View File

@@ -4,6 +4,23 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
"""Interruptible bot with Krisp VIVA noise filtering and turn detection.
This example demonstrates a conversational bot with:
- Krisp VIVA noise reduction on incoming audio
- Krisp VIVA Turn detection for natural interruptions
- Voice activity detection (VAD)
Required environment variables:
- KRISP_VIVA_FILTER_MODEL_PATH: Path to the Krisp noise filter model file (.kef)
- KRISP_VIVA_TURN_MODEL_PATH: Path to the Krisp turn detection model file (.kef)
- DEEPGRAM_API_KEY: Deepgram API key for STT/TTS
- OPENAI_API_KEY: OpenAI API key for LLM
Optional environment variables:
- KRISP_NOISE_SUPPRESSION_LEVEL: Noise suppression level 0-100 (default: 100)
Higher values = more aggressive noise reduction
"""
import os
@@ -11,7 +28,7 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
from pipecat.audio.turn.krisp_viva_turn import KrispTurnParams, KrispVivaTurn
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import LLMRunFrame
@@ -82,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
context,
user_params=LLMUserAggregatorParams(
user_turn_strategies=UserTurnStrategies(
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=KrispVivaTurn())]
),
),
)

View File

@@ -132,3 +132,7 @@ class BaseTurnAnalyzer(ABC):
def clear(self):
"""Reset the turn analyzer to its initial state."""
pass
async def cleanup(self):
"""Cleanup the turn analyzer."""
pass

View File

@@ -133,7 +133,7 @@ class KrispVivaTurn(BaseTurnAnalyzer):
self._sdk_acquired = False
raise
def __del__(self):
async def cleanup(self):
"""Release SDK reference when analyzer is destroyed."""
if self._sdk_acquired:
try:
@@ -192,8 +192,7 @@ class KrispVivaTurn(BaseTurnAnalyzer):
# Create session when sample rate is set
try:
self._tt_session = self._create_tt_session(self._sample_rate)
# Clear buffer when sample rate changes
self._audio_buffer.clear()
self.clear()
except Exception as e:
logger.error(f"Failed to create turn detection session: {e}", exc_info=True)
self._tt_session = None
@@ -310,7 +309,7 @@ class KrispVivaTurn(BaseTurnAnalyzer):
# confirms with sufficient confidence
if self._speech_triggered and prob >= self._params.threshold:
state = EndOfTurnState.COMPLETE
self._clear(state)
self.clear()
break
# Store the last state for analyze_end_of_turn()
@@ -336,18 +335,6 @@ class KrispVivaTurn(BaseTurnAnalyzer):
def clear(self):
"""Reset the turn analyzer to its initial state."""
self._clear(EndOfTurnState.COMPLETE)
def _clear(self, turn_state: EndOfTurnState):
"""Clear internal state based on turn completion status.
Args:
turn_state: The end-of-turn state to use for clearing.
"""
# If the state is still incomplete, keep the _speech_triggered as True
self._speech_triggered = turn_state == EndOfTurnState.INCOMPLETE
# Clear audio buffer on turn completion
if turn_state == EndOfTurnState.COMPLETE:
self._audio_buffer.clear()
# Reset last state when clearing
self._speech_triggered = False
self._audio_buffer.clear()
self._last_state = EndOfTurnState.INCOMPLETE

View File

@@ -73,6 +73,7 @@ class TurnAnalyzerUserTurnStopStrategy(BaseUserTurnStopStrategy):
async def cleanup(self):
"""Cleanup the strategy."""
await super().cleanup()
await self._turn_analyzer.cleanup()
if self._task:
await self.task_manager.cancel_task(self._task)
self._task = None