55 lines
1.6 KiB
Python
55 lines
1.6 KiB
Python
#
|
||
# Copyright (c) 2024–2025, Daily
|
||
#
|
||
# SPDX-License-Identifier: BSD 2-Clause License
|
||
#
|
||
|
||
import asyncio
|
||
import sys
|
||
from typing import Tuple
|
||
|
||
from dotenv import load_dotenv
|
||
from loguru import logger
|
||
from select_audio_device import AudioDevice, run_device_selector
|
||
|
||
from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
|
||
from pipecat.pipeline.pipeline import Pipeline
|
||
from pipecat.pipeline.runner import PipelineRunner
|
||
from pipecat.pipeline.task import PipelineTask
|
||
from pipecat.services.whisper.stt import Model, WhisperSTTService
|
||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||
|
||
load_dotenv(override=True)
|
||
|
||
logger.remove(0)
|
||
logger.add(sys.stderr, level="DEBUG")
|
||
|
||
|
||
async def main(input_device: int, output_device: int):
|
||
transport = LocalAudioTransport(
|
||
LocalAudioTransportParams(
|
||
audio_in_enabled=True,
|
||
audio_out_enabled=False,
|
||
input_device_index=input_device,
|
||
output_device_index=output_device,
|
||
)
|
||
)
|
||
|
||
stt = WhisperSTTService(device="cuda", model=Model.LARGE, no_speech_prob=0.3)
|
||
|
||
pipeline = Pipeline([transport.input(), stt])
|
||
|
||
task = PipelineTask(pipeline, observers=[TranscriptionLogObserver()])
|
||
|
||
runner = PipelineRunner(handle_sigint=False if sys.platform == "win32" else True)
|
||
|
||
await asyncio.gather(runner.run(task))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
res: Tuple[AudioDevice, AudioDevice, int] = asyncio.run(
|
||
run_device_selector() # runs the textual app that allows to select input device
|
||
)
|
||
|
||
asyncio.run(main(res[0].index, res[1].index))
|