Compare commits

...

2 Commits

Author SHA1 Message Date
James Hush
33fd9293c2 Update demo 2025-01-17 12:05:51 +08:00
James Hush
3038d10a1d docs: hold music demo 2025-01-16 13:43:54 +08:00

View File

@@ -8,6 +8,7 @@ import argparse
import asyncio
import os
import sys
from dataclasses import dataclass, field
import aiohttp
from dotenv import load_dotenv
@@ -16,11 +17,26 @@ from runner import configure_with_args
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import MixerEnableFrame, MixerUpdateSettingsFrame
from pipecat.frames.frames import (
BotInterruptionFrame,
BotSpeakingFrame,
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
ControlFrame,
Frame,
InputAudioRawFrame,
LLMTextFrame,
MetricsFrame,
MixerEnableFrame,
MixerUpdateSettingsFrame,
TextFrame,
TTSAudioRawFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -31,7 +47,70 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class DebugProcessor(FrameProcessor):
"""A processor for debugging frames in the pipeline."""
def __init__(self, name, **kwargs): # noqa: D107
self._name = name
super().__init__(**kwargs)
async def process_frame(self, frame: Frame, direction: FrameDirection): # noqa: D102
await super().process_frame(frame, direction)
if not (
isinstance(frame, InputAudioRawFrame)
or isinstance(frame, TTSAudioRawFrame)
or isinstance(frame, BotSpeakingFrame)
or isinstance(frame, BotStartedSpeakingFrame)
or isinstance(frame, MetricsFrame)
or isinstance(frame, LLMTextFrame)
):
logger.info(f"{self._name}: {frame} {direction}")
await self.push_frame(frame, direction)
@dataclass
class StartHoldMusicFrame(ControlFrame):
"""Starts hold music."""
pass
@dataclass
class StopHoldMusicFrame(ControlFrame):
"""Stops hold music."""
pass
class HoldMusicProcessor(FrameProcessor):
"""A processor to play hold music."""
def __init__(self, **kwargs): # noqa: D107
super().__init__(**kwargs)
self._play_hold_music = False
async def process_frame(self, frame: Frame, direction: FrameDirection): # noqa: D102
await super().process_frame(frame, direction)
if isinstance(frame, StartHoldMusicFrame):
self._play_hold_music = True
if isinstance(frame, StartHoldMusicFrame):
self._play_hold_music = False
if isinstance(frame, BotStoppedSpeakingFrame) and self._play_hold_music:
await self.push_frame(
MixerUpdateSettingsFrame({"volume": 1, "sound": "office", "loop": False})
)
await self.push_frame(MixerEnableFrame(True))
# await self.queue_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
elif isinstance(frame, BotSpeakingFrame):
await self.push_frame(MixerEnableFrame(False))
await self.push_frame(frame, direction)
async def main():
"""Main function to run the bot background sound."""
async with aiohttp.ClientSession() as session:
parser = argparse.ArgumentParser(description="Bot Background Sound")
parser.add_argument("-i", "--input", type=str, required=True, help="Input audio file")
@@ -41,7 +120,7 @@ async def main():
soundfile_mixer = SoundfileMixer(
sound_files={"office": args.input},
default_sound="office",
volume=2.0,
volume=0,
)
transport = DailyTransport(
@@ -73,12 +152,16 @@ async def main():
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
dp = DebugProcessor("post-llm")
hold_music_processor = HoldMusicProcessor()
pipeline = Pipeline(
[
transport.input(), # Transport user input
context_aggregator.user(), # User responses
llm, # LLM
dp, # Debug processor
hold_music_processor, # Hold music
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
@@ -98,17 +181,18 @@ async def main():
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Show how to use mixer control frames.
await asyncio.sleep(10.0)
await task.queue_frame(MixerUpdateSettingsFrame({"volume": 0.5}))
await asyncio.sleep(5.0)
await task.queue_frame(MixerEnableFrame(False))
await asyncio.sleep(5.0)
await task.queue_frame(MixerEnableFrame(True))
await asyncio.sleep(5.0)
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frame(TextFrame("I'm going to play some hold music."))
await task.queue_frame(StartHoldMusicFrame())
await asyncio.sleep(3)
await task.queue_frame(StopHoldMusicFrame())
await task.queue_frame(TextFrame("I just stopped the hold music."))
await task.queue_frame(TextFrame("Waiting 2 seconds to play hold music again."))
await asyncio.sleep(3)
await task.queue_frame(StartHoldMusicFrame())
runner = PipelineRunner()