Merge pull request #610 from pipecat-ai/aleix/stt-push-audio
allow STT services to passthrough audio frames
This commit is contained in:
@@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- Added `audio_passthrough` parameter to `STTService`. If enabled it allows
|
||||
audio frames to be pushed downstream in case other processors need them.
|
||||
|
||||
- Added input parameter options for `PlayHTTTSService` and
|
||||
`PlayHTHttpTTSService`.
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ import aiohttp
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVAD
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -20,7 +21,6 @@ from pipecat.processors.aggregators.llm_response import (
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVAD
|
||||
|
||||
from runner import configure
|
||||
|
||||
|
||||
@@ -80,7 +80,6 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
|
||||
@@ -15,7 +15,7 @@ from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.vad.vad_analyzer import VADParams
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
|
||||
@@ -451,8 +451,9 @@ class WordTTSService(TTSService):
|
||||
class STTService(AIService):
|
||||
"""STTService is a base class for speech-to-text services."""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
def __init__(self, audio_passthrough=False, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._audio_passthrough = audio_passthrough
|
||||
self._settings: Dict[str, Any] = {}
|
||||
|
||||
@abstractmethod
|
||||
@@ -490,8 +491,11 @@ class STTService(AIService):
|
||||
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
# In this service we accumulate audio internally and at the end we
|
||||
# push a TextFrame. We don't really want to push audio frames down.
|
||||
# push a TextFrame. We also push audio downstream in case someone
|
||||
# else needs it.
|
||||
await self.process_audio_frame(frame)
|
||||
if self._audio_passthrough:
|
||||
await self.push_frame(frame, direction)
|
||||
elif isinstance(frame, STTUpdateSettingsFrame):
|
||||
await self._update_settings(frame.settings)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user