Files
pipecat/examples/local-input-select-stt/bot.py
2025-03-30 16:21:00 -07:00

66 lines
1.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import sys
from typing import Tuple
from dotenv import load_dotenv
from loguru import logger
from select_audio_device import AudioDevice, run_device_selector
from pipecat.frames.frames import Frame, TranscriptionFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.whisper.stt import Model, WhisperSTTService
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")
async def main(input_device: int, output_device: int):
transport = LocalAudioTransport(
LocalAudioTransportParams(
audio_in_enabled=True,
audio_out_enabled=False,
input_device_index=input_device,
output_device_index=output_device,
)
)
stt = WhisperSTTService(device="cuda", model=Model.LARGE, no_speech_prob=0.3)
tl = TranscriptionLogger()
pipeline = Pipeline([transport.input(), stt, tl])
task = PipelineTask(pipeline)
runner = PipelineRunner(handle_sigint=False if sys.platform == "win32" else True)
await asyncio.gather(runner.run(task))
if __name__ == "__main__":
res: Tuple[AudioDevice, AudioDevice, int] = asyncio.run(
run_device_selector() # runs the textual app that allows to select input device
)
asyncio.run(main(res[0].index, res[1].index))