From bc5f098aaa4ce4fa8823aaab3a72f6ed5f9bcc4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 28 Apr 2025 09:28:26 -0700 Subject: [PATCH] examples: update local-input-select-stt --- examples/local-input-select-stt/bot.py | 17 +++-------------- .../local-input-select-stt/requirements.txt | 6 +----- .../select_audio_device.py | 6 ++++++ 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/examples/local-input-select-stt/bot.py b/examples/local-input-select-stt/bot.py index 95d03e44f..26cbbdfa3 100644 --- a/examples/local-input-select-stt/bot.py +++ b/examples/local-input-select-stt/bot.py @@ -12,11 +12,10 @@ from dotenv import load_dotenv from loguru import logger from select_audio_device import AudioDevice, run_device_selector -from pipecat.frames.frames import Frame, TranscriptionFrame +from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask -from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.whisper.stt import Model, WhisperSTTService from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams @@ -26,14 +25,6 @@ logger.remove(0) logger.add(sys.stderr, level="DEBUG") -class TranscriptionLogger(FrameProcessor): - async def process_frame(self, frame: Frame, direction: FrameDirection): - await super().process_frame(frame, direction) - - if isinstance(frame, TranscriptionFrame): - print(f"Transcription: {frame.text}") - - async def main(input_device: int, output_device: int): transport = LocalAudioTransport( LocalAudioTransportParams( @@ -46,11 +37,9 @@ async def main(input_device: int, output_device: int): stt = WhisperSTTService(device="cuda", model=Model.LARGE, no_speech_prob=0.3) - tl = TranscriptionLogger() + pipeline = Pipeline([transport.input(), stt]) - pipeline = Pipeline([transport.input(), stt, tl]) - - task = PipelineTask(pipeline) + task = PipelineTask(pipeline, observers=[TranscriptionLogObserver()]) runner = PipelineRunner(handle_sigint=False if sys.platform == "win32" else True) diff --git a/examples/local-input-select-stt/requirements.txt b/examples/local-input-select-stt/requirements.txt index 9e2f3e592..16bf20994 100644 --- a/examples/local-input-select-stt/requirements.txt +++ b/examples/local-input-select-stt/requirements.txt @@ -1,8 +1,4 @@ ---extra-index-url https://download.pytorch.org/whl/cu124 -torch==2.5.0+cu124 -torchvision -torchaudio -pipecat[whisper, openai] +pipecat-ai[whisper, openai] textual==1.0.0 pydantic-settings==2.7.1 pyaudio==0.2.14 diff --git a/examples/local-input-select-stt/select_audio_device.py b/examples/local-input-select-stt/select_audio_device.py index 2993eeafc..b6875e407 100644 --- a/examples/local-input-select-stt/select_audio_device.py +++ b/examples/local-input-select-stt/select_audio_device.py @@ -1,3 +1,9 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + from typing import List, Optional, Tuple import pyaudio