diff --git a/examples/foundational/09a-local-mirror.py b/examples/foundational/09a-local-mirror.py index ae82ac646..5cbde5700 100644 --- a/examples/foundational/09a-local-mirror.py +++ b/examples/foundational/09a-local-mirror.py @@ -11,6 +11,7 @@ import tkinter as tk from dotenv import load_dotenv from loguru import logger +from pipecat.examples.run import maybe_capture_participant_camera from pipecat.frames.frames import ( Frame, InputAudioRawFrame, @@ -107,6 +108,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") + await maybe_capture_participant_camera(transport, client, framerate=30) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/12-describe-video.py b/examples/foundational/12-describe-video.py index 467330765..c7edd67c4 100644 --- a/examples/foundational/12-describe-video.py +++ b/examples/foundational/12-describe-video.py @@ -12,7 +12,7 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_video +from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_camera from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -105,7 +105,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - await maybe_capture_participant_video(transport, client) + await maybe_capture_participant_camera(transport, client) # Set the participant ID in the image requester client_id = get_transport_client_id(transport, client) diff --git a/examples/foundational/12a-describe-video-gemini-flash.py b/examples/foundational/12a-describe-video-gemini-flash.py index 9558e883b..fa2d9aec6 100644 --- a/examples/foundational/12a-describe-video-gemini-flash.py +++ b/examples/foundational/12a-describe-video-gemini-flash.py @@ -12,7 +12,7 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_video +from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_camera from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -108,7 +108,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - await maybe_capture_participant_video(transport, client) + await maybe_capture_participant_camera(transport, client) # Set the participant ID in the image requester client_id = get_transport_client_id(transport, client) diff --git a/examples/foundational/12b-describe-video-gpt-4o.py b/examples/foundational/12b-describe-video-gpt-4o.py index 705b8926c..3c7937f54 100644 --- a/examples/foundational/12b-describe-video-gpt-4o.py +++ b/examples/foundational/12b-describe-video-gpt-4o.py @@ -12,7 +12,7 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_video +from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_camera from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -108,7 +108,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - await maybe_capture_participant_video(transport, client) + await maybe_capture_participant_camera(transport, client) # Set the participant ID in the image requester client_id = get_transport_client_id(transport, client) diff --git a/examples/foundational/12c-describe-video-anthropic.py b/examples/foundational/12c-describe-video-anthropic.py index cb90faea2..0e82f1077 100644 --- a/examples/foundational/12c-describe-video-anthropic.py +++ b/examples/foundational/12c-describe-video-anthropic.py @@ -12,7 +12,7 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_video +from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_camera from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -108,7 +108,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - await maybe_capture_participant_video(transport, client) + await maybe_capture_participant_camera(transport, client) # Set the participant ID in the image requester client_id = get_transport_client_id(transport, client) diff --git a/examples/foundational/14b-function-calling-anthropic-video.py b/examples/foundational/14b-function-calling-anthropic-video.py index 427bf0047..04049984f 100644 --- a/examples/foundational/14b-function-calling-anthropic-video.py +++ b/examples/foundational/14b-function-calling-anthropic-video.py @@ -14,7 +14,7 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_video +from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_camera from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -181,7 +181,7 @@ If you need to use a tool, simply use the tool. Do not tell the user the tool yo async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - await maybe_capture_participant_video(transport, client) + await maybe_capture_participant_camera(transport, client) global client_id client_id = get_transport_client_id(transport, client) diff --git a/examples/foundational/14d-function-calling-video.py b/examples/foundational/14d-function-calling-video.py index 67173685a..7761e1be2 100644 --- a/examples/foundational/14d-function-calling-video.py +++ b/examples/foundational/14d-function-calling-video.py @@ -14,7 +14,7 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_video +from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_camera from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask @@ -164,7 +164,7 @@ indicate you should use the get_image tool are: async def on_client_connected(transport, client): logger.info(f"Client connected") - await maybe_capture_participant_video(transport, client) + await maybe_capture_participant_camera(transport, client) global client_id client_id = get_transport_client_id(transport, client) diff --git a/examples/foundational/14e-function-calling-gemini.py b/examples/foundational/14e-function-calling-gemini.py index df39677b3..4e6a582f3 100644 --- a/examples/foundational/14e-function-calling-gemini.py +++ b/examples/foundational/14e-function-calling-gemini.py @@ -14,7 +14,7 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_video +from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_camera from pipecat.frames.frames import TTSSpeakFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -174,7 +174,7 @@ indicate you should use the get_image tool are: async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - await maybe_capture_participant_video(transport, client) + await maybe_capture_participant_camera(transport, client) global client_id client_id = get_transport_client_id(transport, client) diff --git a/examples/foundational/20d-persistent-context-gemini.py b/examples/foundational/20d-persistent-context-gemini.py index c3bba48cf..5219e3a2e 100644 --- a/examples/foundational/20d-persistent-context-gemini.py +++ b/examples/foundational/20d-persistent-context-gemini.py @@ -15,7 +15,7 @@ from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams -from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_video +from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_camera from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -286,7 +286,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si async def on_client_connected(transport, client): logger.info(f"Client connected") - await maybe_capture_participant_video(transport, client) + await maybe_capture_participant_camera(transport, client) global client_id client_id = get_transport_client_id(transport, client) diff --git a/examples/foundational/26c-gemini-multimodal-live-video.py b/examples/foundational/26c-gemini-multimodal-live-video.py index ee25bcb23..c39aeb63b 100644 --- a/examples/foundational/26c-gemini-multimodal-live-video.py +++ b/examples/foundational/26c-gemini-multimodal-live-video.py @@ -10,10 +10,10 @@ import os from dotenv import load_dotenv from loguru import logger -from run import maybe_capture_participant_video from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams +from pipecat.examples.run import maybe_capture_participant_camera, maybe_capture_participant_screen from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -92,7 +92,8 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si async def on_client_connected(transport, client): logger.info(f"Client connected: {client}") - await maybe_capture_participant_video(transport, client) + await maybe_capture_participant_camera(transport, client, framerate=1) + await maybe_capture_participant_screen(transport, client, framerate=1) await task.queue_frames([context_aggregator.user().get_context_frame()]) await asyncio.sleep(3) diff --git a/src/pipecat/examples/run.py b/src/pipecat/examples/run.py index 4d215e8b1..d999a5b09 100644 --- a/src/pipecat/examples/run.py +++ b/src/pipecat/examples/run.py @@ -43,11 +43,21 @@ def get_transport_client_id(transport: BaseTransport, client: Any) -> str: return "" -async def maybe_capture_participant_video(transport: BaseTransport, client: Any): +async def maybe_capture_participant_camera( + transport: BaseTransport, client: Any, framerate: int = 0 +): if isinstance(transport, DailyTransport): - await transport.capture_participant_video(client["id"], framerate=0, video_source="camera") await transport.capture_participant_video( - client["id"], framerate=0, video_source="screenVideo" + client["id"], framerate=framerate, video_source="camera" + ) + + +async def maybe_capture_participant_screen( + transport: BaseTransport, client: Any, framerate: int = 0 +): + if isinstance(transport, DailyTransport): + await transport.capture_participant_video( + client["id"], framerate=framerate, video_source="screenVideo" )