Merge pull request #1680 from pipecat-ai/aleix/local-input-select-stt-update
examples: update local-input-select-stt
This commit is contained in:
30
.gitignore
vendored
30
.gitignore
vendored
@@ -7,7 +7,7 @@ venv
|
||||
/.idea
|
||||
#*#
|
||||
|
||||
# Distribution / packaging
|
||||
# Distribution / Packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
@@ -30,24 +30,24 @@ MANIFEST
|
||||
.env
|
||||
fly.toml
|
||||
|
||||
# Example files
|
||||
pipecat/examples/twilio-chatbot/templates/streams.xml
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/node_modules/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/.expo/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/dist/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/npm-debug.*
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.jks
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.p8
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.p12
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.key
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.mobileprovision
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.orig.*
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/web-build/
|
||||
# Examples
|
||||
examples/telnyx-chatbot/templates/streams.xml
|
||||
examples/twilio-chatbot/templates/streams.xml
|
||||
examples/**/node_modules/
|
||||
examples/**/.expo/
|
||||
examples/**/dist/
|
||||
examples/**/npm-debug.*
|
||||
examples/**/*.jks
|
||||
examples/**/*.p8
|
||||
examples/**/*.p12
|
||||
examples/**/*.key
|
||||
examples/**/*.mobileprovision
|
||||
examples/**/*.orig.*
|
||||
examples/**/web-build/
|
||||
|
||||
# macOS
|
||||
.DS_Store
|
||||
|
||||
|
||||
# Documentation
|
||||
docs/api/_build/
|
||||
docs/api/api
|
||||
@@ -12,11 +12,10 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from select_audio_device import AudioDevice, run_device_selector
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.whisper.stt import Model, WhisperSTTService
|
||||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||||
|
||||
@@ -26,14 +25,6 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
async def main(input_device: int, output_device: int):
|
||||
transport = LocalAudioTransport(
|
||||
LocalAudioTransportParams(
|
||||
@@ -46,11 +37,9 @@ async def main(input_device: int, output_device: int):
|
||||
|
||||
stt = WhisperSTTService(device="cuda", model=Model.LARGE, no_speech_prob=0.3)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
pipeline = Pipeline([transport.input(), stt])
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(pipeline, observers=[TranscriptionLogObserver()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False if sys.platform == "win32" else True)
|
||||
|
||||
|
||||
@@ -1,8 +1,4 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu124
|
||||
torch==2.5.0+cu124
|
||||
torchvision
|
||||
torchaudio
|
||||
pipecat[whisper, openai]
|
||||
pipecat-ai[whisper, openai]
|
||||
textual==1.0.0
|
||||
pydantic-settings==2.7.1
|
||||
pyaudio==0.2.14
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import pyaudio
|
||||
|
||||
Reference in New Issue
Block a user