Files
pipecat/examples/foundational/05a-local-sync-speech-and-image.py

175 lines
5.9 KiB
Python

#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
import tkinter as tk
from pipecat.frames.frames import AudioRawFrame, Frame, URLImageRawFrame, LLMMessagesFrame, TextFrame
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.openai import OpenAILLMService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.fal import FalImageGenService
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.local.tk import TkLocalTransport
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
tk_root = tk.Tk()
tk_root.title("Calendar")
runner = PipelineRunner()
async def get_month_data(month):
messages = [{"role": "system", "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.", }]
class ImageDescription(FrameProcessor):
def __init__(self):
super().__init__()
self.text = ""
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, TextFrame):
self.text = frame.text
await self.push_frame(frame, direction)
class AudioGrabber(FrameProcessor):
def __init__(self):
super().__init__()
self.audio = bytearray()
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, AudioRawFrame):
self.audio.extend(frame.audio)
self.frame = AudioRawFrame(
bytes(self.audio), frame.sample_rate, frame.num_channels)
class ImageGrabber(FrameProcessor):
def __init__(self):
super().__init__()
self.frame = None
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, URLImageRawFrame):
self.frame = frame
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o")
tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
imagegen = FalImageGenService(
params=FalImageGenService.InputParams(
image_size="square_hd"
),
aiohttp_session=session,
key=os.getenv("FAL_KEY"))
aggregator = LLMFullResponseAggregator()
description = ImageDescription()
audio_grabber = AudioGrabber()
image_grabber = ImageGrabber()
pipeline = Pipeline([
llm,
aggregator,
description,
ParallelPipeline([tts, audio_grabber],
[imagegen, image_grabber])
])
task = PipelineTask(pipeline)
await task.queue_frame(LLMMessagesFrame(messages))
await task.stop_when_done()
await runner.run(task)
return {
"month": month,
"text": description.text,
"image": image_grabber.frame,
"audio": audio_grabber.frame,
}
transport = TkLocalTransport(
tk_root,
TransportParams(
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_width=1024,
camera_out_height=1024))
pipeline = Pipeline([transport.output()])
task = PipelineTask(pipeline)
# We only specify 5 months as we create tasks all at once and we might
# get rate limited otherwise.
months: list[str] = [
"January",
"February",
# "March",
# "April",
# "May",
]
# We create one task per month. This will be executed concurrently.
month_tasks = [asyncio.create_task(get_month_data(month)) for month in months]
# Now we wait for each month task in the order they're completed. The
# benefit is we'll have as little delay as possible before the first
# month, and likely no delay between months, but the months won't
# display in order.
async def show_images(month_tasks):
for month_data_task in asyncio.as_completed(month_tasks):
data = await month_data_task
await task.queue_frames([data["image"], data["audio"]])
await runner.stop_when_done()
async def run_tk():
while not task.has_finished():
tk_root.update()
tk_root.update_idletasks()
await asyncio.sleep(0.1)
await asyncio.gather(runner.run(task), show_images(month_tasks), run_tk())
if __name__ == "__main__":
asyncio.run(main())