Replaces every "task" identifier that referred to the BaseTask abstraction with "worker". Asyncio task plumbing (asyncio.Task, BaseTaskManager, TaskManager, create_task, cancel_task, etc.) stays untouched. Highlights: - Classes: BaseTask → BaseWorker, PipelineTask → PipelineWorker, LLMTask → LLMWorker, LLMContextTask → LLMContextWorker, TaskBus → WorkerBus, TaskRegistry → WorkerRegistry, TaskActivationArgs → WorkerActivationArgs, TaskReadyData → WorkerReadyData, TaskRegistryEntry → WorkerRegistryEntry, TaskObserver → WorkerObserver, all Bus*TaskMessage → Bus*WorkerMessage, BusAddTaskMessage.task field → worker, BusWorkerRegistryMessage.tasks field → workers. - Methods/decorators: activate_task → activate_worker, deactivate_task → deactivate_worker, add_task → add_worker, watch_task → watch_worker, @task_ready → @worker_ready, setup_pipeline_task hook → setup_pipeline_worker. - Params/fields: FrameProcessorSetup.pipeline_task and FunctionCallParams.pipeline_task → pipeline_worker. Parameter names like task_name → worker_name; spawn/run accept worker:. - Files: pipeline/base_task.py → base_worker.py, pipeline/task.py → worker.py (plus a re-export shim at pipeline/task.py), task_observer.py → worker_observer.py, task_ready_decorator.py → worker_ready_decorator.py, pipecat.tasks → pipecat.workers, llm_task.py → llm_worker.py, llm_context_task.py → llm_context_worker.py, examples/multi-task → examples/multi-worker. Back-compat: - PipelineTask kept as a deprecated subclass of PipelineWorker that warns on construction. - pipecat.pipeline.task re-exports PipelineWorker/PipelineTask/etc. so existing user imports keep working. - FrameProcessor.pipeline_task kept as a deprecated property that forwards to pipeline_worker. Local variables in examples that hold a worker (task = PipelineTask(...)) are renamed to worker = PipelineWorker(...). Asyncio-task locals (runner_task, etc.) are preserved.
173 lines
5.8 KiB
Python
173 lines
5.8 KiB
Python
#
|
|
# Copyright (c) 2024-2026, Daily
|
|
#
|
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
#
|
|
import os
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from dotenv import load_dotenv
|
|
from loguru import logger
|
|
|
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
from pipecat.frames.frames import Frame, InputImageRawFrame, LLMRunFrame, OutputImageRawFrame
|
|
from pipecat.pipeline.pipeline import Pipeline
|
|
from pipecat.pipeline.runner import PipelineRunner
|
|
from pipecat.pipeline.worker import PipelineParams, PipelineWorker
|
|
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
from pipecat.processors.aggregators.llm_response_universal import (
|
|
LLMContextAggregatorPair,
|
|
LLMUserAggregatorParams,
|
|
)
|
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
from pipecat.runner.types import RunnerArguments
|
|
from pipecat.runner.utils import create_transport
|
|
from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
|
|
from pipecat.transports.base_transport import TransportParams
|
|
from pipecat.transports.daily.transport import DailyParams, DailyTransport
|
|
|
|
load_dotenv(override=True)
|
|
|
|
transport_params = {
|
|
"daily": lambda: DailyParams(
|
|
audio_in_enabled=True,
|
|
audio_out_enabled=True,
|
|
audio_out_10ms_chunks=2,
|
|
video_in_enabled=True,
|
|
video_out_enabled=True,
|
|
video_out_is_live=True,
|
|
),
|
|
"webrtc": lambda: TransportParams(
|
|
audio_in_enabled=True,
|
|
audio_out_enabled=True,
|
|
audio_out_10ms_chunks=2,
|
|
video_in_enabled=True,
|
|
video_out_enabled=True,
|
|
video_out_is_live=True,
|
|
),
|
|
}
|
|
|
|
|
|
class EdgeDetectionProcessor(FrameProcessor):
|
|
def __init__(self, video_out_width, video_out_height: int):
|
|
super().__init__()
|
|
self._video_out_width = video_out_width
|
|
self._video_out_height = video_out_height
|
|
|
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
await super().process_frame(frame, direction)
|
|
|
|
# Send back the user's camera video with edge detection applied
|
|
if isinstance(frame, InputImageRawFrame) and frame.transport_source == "camera":
|
|
# Convert bytes to NumPy array
|
|
img = np.frombuffer(frame.image, dtype=np.uint8).reshape(
|
|
(frame.size[1], frame.size[0], 3)
|
|
)
|
|
|
|
# perform edge detection only on camera frames
|
|
img = cv2.cvtColor(cv2.Canny(img, 100, 200), cv2.COLOR_GRAY2BGR)
|
|
|
|
# convert the size if needed
|
|
desired_size = (self._video_out_width, self._video_out_height)
|
|
if frame.size != desired_size:
|
|
resized_image = cv2.resize(img, desired_size)
|
|
out_frame = OutputImageRawFrame(resized_image.tobytes(), desired_size, frame.format)
|
|
await self.push_frame(out_frame)
|
|
else:
|
|
out_frame = OutputImageRawFrame(
|
|
image=img.tobytes(), size=frame.size, format=frame.format
|
|
)
|
|
await self.push_frame(out_frame)
|
|
else:
|
|
await self.push_frame(frame, direction)
|
|
|
|
|
|
SYSTEM_INSTRUCTION = f"""
|
|
"You are Gemini Chatbot, a friendly, helpful robot.
|
|
|
|
Your goal is to demonstrate your capabilities in a succinct way.
|
|
|
|
Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points.
|
|
|
|
Respond to what the user said in a creative and helpful way. Keep your responses brief. One or two sentences at most.
|
|
"""
|
|
|
|
|
|
async def run_bot(pipecat_transport):
|
|
llm = GeminiLiveLLMService(
|
|
api_key=os.environ["GOOGLE_API_KEY"],
|
|
voice_id="Puck", # Aoede, Charon, Fenrir, Kore, Puck
|
|
transcribe_user_audio=True,
|
|
system_instruction=SYSTEM_INSTRUCTION,
|
|
)
|
|
|
|
context = LLMContext()
|
|
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
|
context,
|
|
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
|
)
|
|
|
|
pipeline = Pipeline(
|
|
[
|
|
pipecat_transport.input(),
|
|
user_aggregator,
|
|
llm, # LLM
|
|
EdgeDetectionProcessor(
|
|
pipecat_transport._params.video_out_width,
|
|
pipecat_transport._params.video_out_height,
|
|
), # Sending the video back to the user
|
|
pipecat_transport.output(),
|
|
assistant_aggregator,
|
|
]
|
|
)
|
|
|
|
worker = PipelineWorker(
|
|
pipeline,
|
|
params=PipelineParams(
|
|
enable_metrics=True,
|
|
enable_usage_metrics=True,
|
|
),
|
|
)
|
|
|
|
@worker.rtvi.event_handler("on_client_ready")
|
|
async def on_client_ready(rtvi):
|
|
logger.info("Pipecat client ready.")
|
|
# Kick off the conversation.
|
|
context.add_message(
|
|
{
|
|
"role": "developer",
|
|
"content": "Start by greeting the user warmly and introducing yourself.",
|
|
}
|
|
)
|
|
await worker.queue_frames([LLMRunFrame()])
|
|
|
|
@pipecat_transport.event_handler("on_client_connected")
|
|
async def on_client_connected(transport, participant):
|
|
logger.info("Pipecat Client connected")
|
|
if isinstance(transport, DailyTransport):
|
|
await pipecat_transport.capture_participant_video(participant["id"], framerate=30)
|
|
else:
|
|
await pipecat_transport.capture_participant_video("camera")
|
|
|
|
@pipecat_transport.event_handler("on_client_disconnected")
|
|
async def on_client_disconnected(transport, client):
|
|
logger.info("Pipecat Client disconnected")
|
|
await worker.cancel()
|
|
|
|
runner = PipelineRunner(handle_sigint=False, force_gc=True)
|
|
|
|
await runner.run(worker)
|
|
|
|
|
|
async def bot(runner_args: RunnerArguments):
|
|
"""Main bot entry point compatible with Pipecat Cloud."""
|
|
transport = await create_transport(runner_args, transport_params)
|
|
await run_bot(transport)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from pipecat.runner.run import main
|
|
|
|
main()
|