Merge pull request #3391 from pipecat-ai/filipi/krisp_followup_improvements
Krisp VIVA follow-up improvements
This commit is contained in:
1
changelog/3391.added.md
Normal file
1
changelog/3391.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `KrispVivaTurn` analyzer for end of turn detection using the Krisp VIVA SDK (requires `krisp_audio`).
|
||||
1
changelog/3391.changed.md
Normal file
1
changelog/3391.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Improved Krisp SDK management to allow `KrispVivaTurn` and `KrispVivaFilter` to share a single SDK instance within the same process.
|
||||
1
changelog/3391.fixed.md
Normal file
1
changelog/3391.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed potential memory leaks and initialization issues in `KrispVivaFilter` by improving SDK lifecycle management.
|
||||
@@ -4,6 +4,23 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Interruptible bot with Krisp VIVA noise filtering and turn detection.
|
||||
|
||||
This example demonstrates a conversational bot with:
|
||||
- Krisp VIVA noise reduction on incoming audio
|
||||
- Krisp VIVA Turn detection for natural interruptions
|
||||
- Voice activity detection (VAD)
|
||||
|
||||
Required environment variables:
|
||||
- KRISP_VIVA_FILTER_MODEL_PATH: Path to the Krisp noise filter model file (.kef)
|
||||
- KRISP_VIVA_TURN_MODEL_PATH: Path to the Krisp turn detection model file (.kef)
|
||||
- DEEPGRAM_API_KEY: Deepgram API key for STT/TTS
|
||||
- OPENAI_API_KEY: OpenAI API key for LLM
|
||||
|
||||
Optional environment variables:
|
||||
- KRISP_NOISE_SUPPRESSION_LEVEL: Noise suppression level 0-100 (default: 100)
|
||||
Higher values = more aggressive noise reduction
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
@@ -11,7 +28,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.turn.krisp_viva_turn import KrispTurnParams, KrispVivaTurn
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
@@ -82,7 +99,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=KrispVivaTurn())]
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -132,3 +132,7 @@ class BaseTurnAnalyzer(ABC):
|
||||
def clear(self):
|
||||
"""Reset the turn analyzer to its initial state."""
|
||||
pass
|
||||
|
||||
async def cleanup(self):
|
||||
"""Cleanup the turn analyzer."""
|
||||
pass
|
||||
|
||||
@@ -133,7 +133,7 @@ class KrispVivaTurn(BaseTurnAnalyzer):
|
||||
self._sdk_acquired = False
|
||||
raise
|
||||
|
||||
def __del__(self):
|
||||
async def cleanup(self):
|
||||
"""Release SDK reference when analyzer is destroyed."""
|
||||
if self._sdk_acquired:
|
||||
try:
|
||||
@@ -192,8 +192,7 @@ class KrispVivaTurn(BaseTurnAnalyzer):
|
||||
# Create session when sample rate is set
|
||||
try:
|
||||
self._tt_session = self._create_tt_session(self._sample_rate)
|
||||
# Clear buffer when sample rate changes
|
||||
self._audio_buffer.clear()
|
||||
self.clear()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create turn detection session: {e}", exc_info=True)
|
||||
self._tt_session = None
|
||||
@@ -310,7 +309,7 @@ class KrispVivaTurn(BaseTurnAnalyzer):
|
||||
# confirms with sufficient confidence
|
||||
if self._speech_triggered and prob >= self._params.threshold:
|
||||
state = EndOfTurnState.COMPLETE
|
||||
self._clear(state)
|
||||
self.clear()
|
||||
break
|
||||
|
||||
# Store the last state for analyze_end_of_turn()
|
||||
@@ -336,18 +335,6 @@ class KrispVivaTurn(BaseTurnAnalyzer):
|
||||
|
||||
def clear(self):
|
||||
"""Reset the turn analyzer to its initial state."""
|
||||
self._clear(EndOfTurnState.COMPLETE)
|
||||
|
||||
def _clear(self, turn_state: EndOfTurnState):
|
||||
"""Clear internal state based on turn completion status.
|
||||
|
||||
Args:
|
||||
turn_state: The end-of-turn state to use for clearing.
|
||||
"""
|
||||
# If the state is still incomplete, keep the _speech_triggered as True
|
||||
self._speech_triggered = turn_state == EndOfTurnState.INCOMPLETE
|
||||
# Clear audio buffer on turn completion
|
||||
if turn_state == EndOfTurnState.COMPLETE:
|
||||
self._audio_buffer.clear()
|
||||
# Reset last state when clearing
|
||||
self._speech_triggered = False
|
||||
self._audio_buffer.clear()
|
||||
self._last_state = EndOfTurnState.INCOMPLETE
|
||||
|
||||
@@ -73,6 +73,7 @@ class TurnAnalyzerUserTurnStopStrategy(BaseUserTurnStopStrategy):
|
||||
async def cleanup(self):
|
||||
"""Cleanup the strategy."""
|
||||
await super().cleanup()
|
||||
await self._turn_analyzer.cleanup()
|
||||
if self._task:
|
||||
await self.task_manager.cancel_task(self._task)
|
||||
self._task = None
|
||||
|
||||
Reference in New Issue
Block a user