Files
pipecat/examples/local-input-select-stt/bot.py
Julien Le Bourg 77fb63372a fix: incorrectly changed the base type in my last pull request for L… (#1184)
* fix: incorrectly changed the base type in my last pull request for  LocalAudioTransport

* update examples to use the new LocalTransportParams

* add local device select example
2025-02-11 08:35:57 -08:00

66 lines
1.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import sys
from typing import Tuple
from dotenv import load_dotenv
from loguru import logger
from select_audio_device import AudioDevice, run_device_selector
from pipecat.frames.frames import Frame, TranscriptionFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.whisper import Model, WhisperSTTService
from pipecat.transports.local.audio import LocalAudioTransport, LocalTransportParams
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")
async def main(input_device: int, output_device: int):
transport = LocalAudioTransport(
LocalTransportParams(
audio_in_enabled=True,
audio_out_enabled=False,
input_device_index=input_device,
output_device_index=output_device,
)
)
stt = WhisperSTTService(device="cuda", model=Model.LARGE, no_speech_prob=0.3)
tl = TranscriptionLogger()
pipeline = Pipeline([transport.input(), stt, tl])
task = PipelineTask(pipeline)
runner = PipelineRunner(handle_sigint=False if sys.platform == "win32" else True)
await asyncio.gather(runner.run(task))
if __name__ == "__main__":
res: Tuple[AudioDevice, AudioDevice, int] = asyncio.run(
run_device_selector() # runs the textual app that allows to select input device
)
asyncio.run(main(res[0].index, res[1].index))