66 lines
1.9 KiB
Python
66 lines
1.9 KiB
Python
#
|
||
# Copyright (c) 2024–2025, Daily
|
||
#
|
||
# SPDX-License-Identifier: BSD 2-Clause License
|
||
#
|
||
|
||
import asyncio
|
||
import sys
|
||
from typing import Tuple
|
||
|
||
from dotenv import load_dotenv
|
||
from loguru import logger
|
||
from select_audio_device import AudioDevice, run_device_selector
|
||
|
||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||
from pipecat.pipeline.pipeline import Pipeline
|
||
from pipecat.pipeline.runner import PipelineRunner
|
||
from pipecat.pipeline.task import PipelineTask
|
||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||
from pipecat.services.whisper import Model, WhisperSTTService
|
||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||
|
||
load_dotenv(override=True)
|
||
|
||
logger.remove(0)
|
||
logger.add(sys.stderr, level="DEBUG")
|
||
|
||
|
||
class TranscriptionLogger(FrameProcessor):
|
||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||
await super().process_frame(frame, direction)
|
||
|
||
if isinstance(frame, TranscriptionFrame):
|
||
print(f"Transcription: {frame.text}")
|
||
|
||
|
||
async def main(input_device: int, output_device: int):
|
||
transport = LocalAudioTransport(
|
||
LocalAudioTransportParams(
|
||
audio_in_enabled=True,
|
||
audio_out_enabled=False,
|
||
input_device_index=input_device,
|
||
output_device_index=output_device,
|
||
)
|
||
)
|
||
|
||
stt = WhisperSTTService(device="cuda", model=Model.LARGE, no_speech_prob=0.3)
|
||
|
||
tl = TranscriptionLogger()
|
||
|
||
pipeline = Pipeline([transport.input(), stt, tl])
|
||
|
||
task = PipelineTask(pipeline)
|
||
|
||
runner = PipelineRunner(handle_sigint=False if sys.platform == "win32" else True)
|
||
|
||
await asyncio.gather(runner.run(task))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
res: Tuple[AudioDevice, AudioDevice, int] = asyncio.run(
|
||
run_device_selector() # runs the textual app that allows to select input device
|
||
)
|
||
|
||
asyncio.run(main(res[0].index, res[1].index))
|