Remove vad

intervention processor
2025-08-14 11:17:22 +08:00 · 2025-08-14 11:16:54 +08:00 · 2025-08-14 11:15:55 +08:00
1 changed files with 78 additions and 0 deletions
--- a/examples/foundational/07d-interruptible-elevenlabs.py
+++ b/examples/foundational/07d-interruptible-elevenlabs.py
@@ -5,16 +5,27 @@
 #
 import asyncio
 import os
 from dotenv import load_dotenv
 from loguru import logger
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.audio.vad.vad_analyzer import VADParams
 from pipecat.frames.frames import (
    BotStartedSpeakingFrame,
    Frame,
    LLMFullResponseStartFrame,
    LLMTextFrame,
    TranscriptionFrame,
    TTSSpeakFrame,
 )
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
 from pipecat.services.deepgram.stt import DeepgramSTTService
@@ -49,6 +60,65 @@ transport_params = {
 }
 class TranscriptionLogger(FrameProcessor):
    """Custom processor that logs transcription frames."""
    async def process_frame(self, frame, direction):
        await super().process_frame(frame, direction)
        # Only log TranscriptionFrame objects
        if isinstance(frame, TranscriptionFrame):
            logger.info(f"[TRANSCRIPTION]: {frame.text}")
        # Always pass the frame through to maintain pipeline flow
        await self.push_frame(frame, direction)
 class InterventionProcessor(FrameProcessor):
    """Custom processor that logs LLM response frames."""
    def __init__(self):
        super().__init__()
        self._timer_task = None
    async def process_frame(self, frame, direction):
        await super().process_frame(frame, direction)
        # Log LLM response start frames
        if isinstance(frame, LLMFullResponseStartFrame):
            logger.info(f"[LLM_START]: Starting LLM response")
            # Cancel any existing timer
            if self._timer_task and not self._timer_task.done():
                self._timer_task.cancel()
            # Start a new 500ms timer
            self._timer_task = asyncio.create_task(self._log_after_delay())
        # Cancel timer if bot started speaking before 500ms
        elif isinstance(frame, BotStartedSpeakingFrame):
            logger.info(f"[BOT_SPEAKING]: Bot started speaking, canceling intervention timer")
            if self._timer_task and not self._timer_task.done():
                self._timer_task.cancel()
        # Log LLM text frames
        elif isinstance(frame, LLMTextFrame):
            logger.info(f"[LLM_TEXT]: {frame.text}")
        # Always pass the frame through to maintain pipeline flow
        await self.push_frame(frame, direction)
    async def _log_after_delay(self):
        """Log a message after 500ms delay."""
        try:
            await asyncio.sleep(0.5)  # 500ms
            logger.info(f"500ms passed since LLMFullResponseStartFrame")
            await self.queue_frame(TTSSpeakFrame("um..."))
        except asyncio.CancelledError:
            # Timer was cancelled, which is fine
            pass
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")
@@ -71,13 +141,21 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    context = OpenAILLMContext(messages)
    context_aggregator = llm.create_context_aggregator(context)
    # Create transcription logger instance
    transcription_logger = TranscriptionLogger()
    # Create LLM logger instance
    intervention = InterventionProcessor()
    pipeline = Pipeline(
        [
            transport.input(),  # Transport user input
            stt,
            transcription_logger,  # Log transcription frames
            context_aggregator.user(),  # User responses
            llm,  # LLM
            tts,  # TTS
            intervention,  # Log LLM response frames
            transport.output(),  # Transport bot output
            context_aggregator.assistant(),  # Assistant spoken responses
        ]
Author	SHA1	Message	Date
James Hush	fdf0652141	Remove vad	2025-08-14 11:17:22 +08:00
James Hush	237c400f2d	Remove vad	2025-08-14 11:16:54 +08:00
James Hush	b6afce2a92	intervention processor	2025-08-14 11:15:55 +08:00