From 12eb9437c12f6681a29df33bc90fc720b4612c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 17 Oct 2024 15:34:14 -0700 Subject: [PATCH 1/2] services(stt): allow STT service to passthrough audio --- CHANGELOG.md | 3 +++ src/pipecat/services/ai_services.py | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ddc78b7e2..4dcefc96b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added `audio_passthrough` parameter to `STTService`. If enabled it allows + audio frames to be pushed downstream in case other processors need them. + - Added input parameter options for `PlayHTTTSService` and `PlayHTHttpTTSService`. diff --git a/src/pipecat/services/ai_services.py b/src/pipecat/services/ai_services.py index 8e11ad6ee..22ceffb7b 100644 --- a/src/pipecat/services/ai_services.py +++ b/src/pipecat/services/ai_services.py @@ -451,8 +451,9 @@ class WordTTSService(TTSService): class STTService(AIService): """STTService is a base class for speech-to-text services.""" - def __init__(self, **kwargs): + def __init__(self, audio_passthrough=False, **kwargs): super().__init__(**kwargs) + self._audio_passthrough = audio_passthrough self._settings: Dict[str, Any] = {} @abstractmethod @@ -490,8 +491,11 @@ class STTService(AIService): if isinstance(frame, AudioRawFrame): # In this service we accumulate audio internally and at the end we - # push a TextFrame. We don't really want to push audio frames down. + # push a TextFrame. We also push audio downstream in case someone + # else needs it. await self.process_audio_frame(frame) + if self._audio_passthrough: + await self.push_frame(frame, direction) elif isinstance(frame, STTUpdateSettingsFrame): await self._update_settings(frame.settings) else: From c9318ecd5c9ab4aee03551a9626e36f5bb637ffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 17 Oct 2024 15:34:30 -0700 Subject: [PATCH 2/2] examples: minor fixes --- examples/foundational/07-interruptible-vad.py | 2 +- examples/foundational/07c-interruptible-deepgram.py | 1 - examples/foundational/19-openai-realtime-beta.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/foundational/07-interruptible-vad.py b/examples/foundational/07-interruptible-vad.py index 716e3ee03..9ec380bd0 100644 --- a/examples/foundational/07-interruptible-vad.py +++ b/examples/foundational/07-interruptible-vad.py @@ -9,6 +9,7 @@ import aiohttp import os import sys +from pipecat.audio.vad.silero import SileroVAD from pipecat.frames.frames import LLMMessagesFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -20,7 +21,6 @@ from pipecat.processors.aggregators.llm_response import ( from pipecat.services.cartesia import CartesiaTTSService from pipecat.services.openai import OpenAILLMService from pipecat.transports.services.daily import DailyParams, DailyTransport -from pipecat.vad.silero import SileroVAD from runner import configure diff --git a/examples/foundational/07c-interruptible-deepgram.py b/examples/foundational/07c-interruptible-deepgram.py index e913005e1..d232ad973 100644 --- a/examples/foundational/07c-interruptible-deepgram.py +++ b/examples/foundational/07c-interruptible-deepgram.py @@ -80,7 +80,6 @@ async def main(): @transport.event_handler("on_first_participant_joined") async def on_first_participant_joined(transport, participant): - transport.capture_participant_transcription(participant["id"]) # Kick off the conversation. messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMMessagesFrame(messages)]) diff --git a/examples/foundational/19-openai-realtime-beta.py b/examples/foundational/19-openai-realtime-beta.py index e9cb02f23..f258f95e4 100644 --- a/examples/foundational/19-openai-realtime-beta.py +++ b/examples/foundational/19-openai-realtime-beta.py @@ -15,7 +15,7 @@ from loguru import logger from runner import configure from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.vad.vad_analyzer import VADParams +from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask