added fuzz example

wip
wip: telestrator
2024-03-22 14:20:16 +00:00 · 2024-03-19 22:04:47 +00:00 · 2024-03-19 15:31:19 +00:00 · 2024-03-19 03:08:04 +00:00 · 2024-03-19 01:51:36 +00:00 · 2024-03-18 22:14:02 +00:00
17 changed files with 1287 additions and 18 deletions
--- a/src/dailyai/pipeline/aggregators.py
+++ b/src/dailyai/pipeline/aggregators.py
@@ -252,9 +252,15 @@ class LLMFullResponseAggregator(FrameProcessor):
        self.aggregation = ""
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if not isinstance(frame, AudioFrame):
            print(f"^^^ LFRA got frame: {frame}")
        if isinstance(frame, TextFrame):
            self.aggregation += frame.text
            print(
                f"^^^ LFRA got textframe. aggregation is now {self.aggregation}")
        elif isinstance(frame, LLMResponseEndFrame):
            print(
                f"^^^ LFRA got an llmresponseendframe. About to yield aggregation: {self.aggregation}")
            yield TextFrame(self.aggregation)
            yield frame
            self.aggregation = ""
--- a/src/dailyai/pipeline/frames.py
+++ b/src/dailyai/pipeline/frames.py
@@ -179,3 +179,33 @@ class LLMFunctionCallFrame(Frame):
    """Emitted when the LLM has received an entire function call completion."""
    function_name: str
    arguments: str
@dataclass()
 class VideoImageFrame(Frame):
    """Contains a still image from a partcipant's video stream."""
    participantId: str
    image: bytes
    # def __str__(self):
    #     return f"{self.__class__.__name__}, participantId: {self.participantId}, image size: {len(self.image)} B"
 class TelestratorImageFrame(ImageFrame):
    pass
@dataclass()
 class VisionFrame(Frame):
    prompt: str
    image: bytes
    # def __str__(self):
    #     return f"{self.__class__.__name__}, prompt: {self.prompt}, image size: {len(self.image)} B"
@dataclass()
 class RequestVideoImageFrame(Frame):
    """Send to the transport to request a new video image from a specific participant. Leave participantId
    empty to request a frame from all participants."""
    participantId: str | None
--- a/src/dailyai/services/ai_services.py
+++ b/src/dailyai/services/ai_services.py
@@ -18,6 +18,7 @@ from dailyai.pipeline.frames import (
    Frame,
    TextFrame,
    TranscriptionQueueFrame,
    VisionFrame
 )
 from abc import abstractmethod
@@ -61,6 +62,7 @@ class TTSService(AIService):
                yield TextFrame(self.current_sentence)
        if not isinstance(frame, TextFrame):
            print(f"*** tts yielding non-text: {frame}")
            yield frame
            return
@@ -79,6 +81,7 @@ class TTSService(AIService):
            # note we pass along the text frame *after* the audio, so the text
            # frame is completed after the audio is processed.
            print(f"*** tts yielding text: {text}")
            yield TextFrame(text)
@@ -133,14 +136,36 @@ class STTService(AIService):
        yield TranscriptionQueueFrame(text, "", str(time.time()))
 class VisionService(AIService):
    def __init__(self):
        super().__init__()
    # Renders the image. Returns an Image object.
    # TODO-CB: return type
    @abstractmethod
    async def run_vision(self, prompt: str, image: bytes):
        pass
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, VisionFrame):
            async for frame in self.run_vision(frame.prompt, frame.image):
                print(
                    f"&&& visionservce processframe got frame to yield: {frame}")
                yield frame
            yield LLMResponseEndFrame()
        else:
            yield frame
 class FrameLogger(AIService):
    def __init__(self, prefix="Frame", **kwargs):
        super().__init__(**kwargs)
        self.prefix = prefix
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
-        if isinstance(frame, (AudioFrame, ImageFrame)):
+        if isinstance(frame, (AudioFrame)):
-            self.logger.info(f"{self.prefix}: {type(frame)}")
+            # self.logger.info(f"{self.prefix}: {type(frame)}")
            pass
        else:
            print(f"{self.prefix}: {frame}")
--- a/src/dailyai/services/base_transport_service.py
+++ b/src/dailyai/services/base_transport_service.py
@@ -24,6 +24,8 @@ from dailyai.pipeline.frames import (
    TextFrame,
    UserStartedSpeakingFrame,
    UserStoppedSpeakingFrame,
    RequestVideoImageFrame,
    TelestratorImageFrame
 )
 from dailyai.pipeline.pipeline import Pipeline
 from dailyai.services.ai_services import TTSService
@@ -90,7 +92,9 @@ class BaseTransportService:
        self._vad_stop_s = kwargs.get("vad_stop_s") or 0.8
        self._context = kwargs.get("context") or []
        self._vad_enabled = kwargs.get("vad_enabled") or False
-
+        self._receive_video = kwargs.get("receive_video") or False
        self._receive_video_fps = kwargs.get("receive_video_fps") or 0.0
        self._participant_frame_times = {}
        if self._vad_enabled and self._speaker_enabled:
            raise Exception(
                "Sorry, you can't use speaker_enabled and vad_enabled at the same time. Please set one to False."
@@ -441,6 +445,7 @@ class BaseTransportService:
                    # discard them
                    if not self._is_interrupted.is_set():
                        if frame:
                            if isinstance(frame, AudioFrame):
                                chunk = frame.data
@@ -452,6 +457,12 @@ class BaseTransportService:
                                    self.write_frame_to_mic(
                                        bytes(b[:truncated_length]))
                                    b = b[truncated_length:]
                            elif isinstance(frame, TelestratorImageFrame):
                                self._set_image(frame.image)
                                asyncio.run_coroutine_threadsafe(
                                    self.receive_queue.put(frame),
                                    self._loop,
                                )
                            elif isinstance(frame, ImageFrame):
                                self._set_image(frame.image)
                            elif isinstance(frame, SpriteFrame):
@@ -459,6 +470,15 @@ class BaseTransportService:
                            elif isinstance(frame, SendAppMessageFrame):
                                self.send_app_message(
                                    frame.message, frame.participantId)
                            elif isinstance(frame, RequestVideoImageFrame):
                                # removing one or all participant IDs from _participant_frame_times
                                # will cause the transport to send the next available frame from
                                # that participant
                                if frame.participantId:
                                    self._participant_frame_times.pop(
                                        frame.participantId, None)
                                else:
                                    self._participant_frame_times.clear()
                        elif len(b):
                            self.write_frame_to_mic(bytes(b))
                            b = bytearray()
--- a/src/dailyai/services/daily_transport_service.py
+++ b/src/dailyai/services/daily_transport_service.py
@@ -2,6 +2,7 @@ import asyncio
 import inspect
 import logging
 import signal
 import time
 import threading
 import types
@@ -11,6 +12,8 @@ from typing import Any
 from dailyai.pipeline.frames import (
    ReceivedAppMessageFrame,
    TranscriptionQueueFrame,
    VideoImageFrame,
    TelestratorImageFrame
 )
 from threading import Event
@@ -204,11 +207,12 @@ class DailyTransportService(BaseTransportService, EventHandler):
        )
        self._my_participant_id = self.client.participants()["local"]["id"]
-        self.client.update_subscription_profiles({
+        if not self._receive_video:
-            "base": {
+            self.client.update_subscription_profiles({
-                "camera": "unsubscribed",
+                "base": {
-            }
+                    "camera": "unsubscribed",
-        })
+                }
            })
        if self._token and self._start_transcription:
            self.client.start_transcription(self.transcription_settings)
@@ -225,6 +229,31 @@ class DailyTransportService(BaseTransportService, EventHandler):
        self.client.leave()
        self.client.release()
    def _handle_video_frame(self, participant_id, video_frame):
        """If receive_video is true, this function is called once for each frame from each participant. We
         don't need to send every frame to the pipeline, so there are two ways to decide how to send frames:
         1. Set a greater-than-zero value for receive_video_fps. The transport will track the last send time
            for each participant and send a new frame when the requested frame rate has elapsed. This
            guarantees an image every second, for example.
         2. Set receive_video_fps less than or equal to zero to disable timed frame sending. Then, put a
            RequestVideoImageFrame in the pipeline to get a new frame for one or all participants. By
            sending a RequestVideoImageFrame immediately after successfully processing an image, you can
            ensure you don't end up queueing up frames faster than you can process them.
            """
        send_frame = False
        if not participant_id in self._participant_frame_times:
            # then it's a new participant; send the first frame
            send_frame = True
        elif self._receive_video_fps > 0 and time.time() > self._participant_frame_times[participant_id] + 1.0/self._receive_video_fps:
            # Then it's an existing participant who is due to send a new frame
            send_frame = True
        if send_frame:
            self._participant_frame_times[participant_id] = time.time()
            future = asyncio.run_coroutine_threadsafe(
                self.receive_queue.put(
                    VideoImageFrame(participant_id, video_frame)), self._loop)
    def on_first_other_participant_joined(self):
        pass
@@ -248,6 +277,9 @@ class DailyTransportService(BaseTransportService, EventHandler):
        if not self._other_participant_has_joined and participant["id"] != self._my_participant_id:
            self._other_participant_has_joined = True
            self.on_first_other_participant_joined()
        if self._receive_video:
            self.client.set_video_renderer(
                participant["id"], self._handle_video_frame)
    def on_participant_left(self, participant, reason):
        if len(self.client.participants()) < self._min_others_count + 1:
--- a/src/dailyai/services/elevenlabs_ai_service.py
+++ b/src/dailyai/services/elevenlabs_ai_service.py
@@ -15,18 +15,19 @@ class ElevenLabsTTSService(TTSService):
        *,
        aiohttp_session: aiohttp.ClientSession,
        api_key,
-        voice_id,
+        narrator,
        model="eleven_turbo_v2",
        aggregate_sentences=True
    ):
-        super().__init__()
+        super().__init__(aggregate_sentences)
        self._api_key = api_key
-        self._voice_id = voice_id
+        self._narrator = narrator
        self._aiohttp_session = aiohttp_session
        self._model = model
    async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
-        url = f"https://api.elevenlabs.io/v1/text-to-speech/{self._voice_id}/stream"
+        url = f"https://api.elevenlabs.io/v1/text-to-speech/{self._narrator['narrator']['voice_id']}/stream"
        payload = {"text": sentence, "model_id": self._model}
        querystring = {
            "output_format": "pcm_16000",
@@ -35,6 +36,7 @@ class ElevenLabsTTSService(TTSService):
            "xi-api-key": self._api_key,
            "Content-Type": "application/json",
        }
        async with self._aiohttp_session.post(
            url, json=payload, headers=headers, params=querystring
        ) as r:
--- a/src/dailyai/services/fal_ai_services.py
+++ b/src/dailyai/services/fal_ai_services.py
@@ -53,4 +53,7 @@ class FalImageGenService(ImageGenService):
        async with self._aiohttp_session.get(image_url) as response:
            image_stream = io.BytesIO(await response.content.read())
            image = Image.open(image_stream)
-            return (image_url, image.tobytes())
+            image_bytes = image.tobytes()
            print(f"!!! fal image tobytes is:")
            print(image)
            return (image_url, image_bytes)
--- a/src/dailyai/services/open_ai_services.py
+++ b/src/dailyai/services/open_ai_services.py
@@ -2,13 +2,22 @@ import aiohttp
 from PIL import Image
 import io
 import time
-from openai import AsyncOpenAI
+import base64
 from openai import AsyncOpenAI, AsyncStream
 import json
 from collections.abc import AsyncGenerator
-from dailyai.services.ai_services import LLMService, ImageGenService
+from openai.types.chat import (
    ChatCompletion,
    ChatCompletionChunk,
    ChatCompletionMessageParam,
 )
 from daily import VideoFrame
 from dailyai.services.ai_services import LLMService, ImageGenService, VisionService
 from dailyai.services.openai_api_llm_service import BaseOpenAILLMService
 from dailyai.pipeline.frames import TextFrame
 class OpenAILLMService(BaseOpenAILLMService):
@@ -50,3 +59,67 @@ class OpenAIImageGenService(ImageGenService):
            image_stream = io.BytesIO(await response.content.read())
            image = Image.open(image_stream)
            return (image_url, image.tobytes())
 class OpenAIVisionService(VisionService):
    def __init__(
        self,
        *,
        model="gpt-4-vision-preview",
        api_key,
    ):
        self._model = model
        self._client = AsyncOpenAI(api_key=api_key)
    async def run_vision(self, prompt: str, image: bytes):
        if isinstance(image, VideoFrame):
            # Then it's from a daily video frame
            print("### processing daily video frame for recognition")
            IMAGE_WIDTH = image.width
            IMAGE_HEIGHT = image.height
            COLOR_FORMAT = image.color_format
            a_image = Image.frombytes(
                'RGBA', (IMAGE_WIDTH, IMAGE_HEIGHT), image.buffer)
            new_image = a_image.convert('RGB')
        else:
            # handle it as a byte stream from image gen
            new_image = Image.frombytes('RGB', (1024, 1024), image)
            # Uncomment these lines to write the frame to a jpg in the same directory.
            # current_path = os.getcwd()
            # image_path = os.path.join(current_path, "image.jpg")
            # image.save(image_path, format="JPEG")
        jpeg_buffer = io.BytesIO()
        new_image.save(jpeg_buffer, format='JPEG')
        jpeg_bytes = jpeg_buffer.getvalue()
        base64_image = base64.b64encode(jpeg_bytes).decode('utf-8')
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                        },
                    },
                ],
            }
        ]
        chunks: AsyncStream[ChatCompletionChunk] = (
            await self._client.chat.completions.create(
                model=self._model,
                stream=True,
                messages=messages,
            )
        )
        async for chunk in chunks:
            print(f"%%% chunk: {chunk}")
            if len(chunk.choices) == 0:
                continue
            if chunk.choices[0].delta.content:
                yield TextFrame(chunk.choices[0].delta.content)
--- a/src/examples/foundational/12-describe-video.py
+++ b/src/examples/foundational/12-describe-video.py
@@ -0,0 +1,97 @@
 import asyncio
 import aiohttp
 import logging
 import os
 from typing import AsyncGenerator
 from dailyai.pipeline.frames import Frame, LLMMessagesQueueFrame, RequestVideoImageFrame, LLMResponseEndFrame
 from dailyai.pipeline.pipeline import Pipeline
 from dailyai.pipeline.frame_processor import FrameProcessor
 from dailyai.services.daily_transport_service import DailyTransportService
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.services.open_ai_services import OpenAILLMService, OpenAIVisionService
 from dailyai.services.deepgram_ai_services import DeepgramTTSService
 from dailyai.services.ai_services import FrameLogger
 from dailyai.pipeline.aggregators import (
    LLMAssistantContextAggregator,
    LLMUserContextAggregator,
 )
 from dailyai.pipeline.frames import VideoImageFrame, VisionFrame
 from examples.support.runner import configure
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
 class VideoImageFrameProcessor(FrameProcessor):
    def __init__(self):
        pass
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, VideoImageFrame):
            yield VisionFrame("Describe the image in one sentence.", frame.image)
        else:
            yield frame
 class ImageRefresher(FrameProcessor):
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, LLMResponseEndFrame):
            yield RequestVideoImageFrame(participantId=None)
            yield frame
        else:
            yield frame
 async def main(room_url: str, token):
    async with aiohttp.ClientSession() as session:
        transport = DailyTransportService(
            room_url,
            token,
            "Respond bot",
            duration_minutes=5,
            start_transcription=True,
            mic_enabled=True,
            mic_sample_rate=16000,
            camera_enabled=False,
            vad_enabled=True,
            receive_video=True,
            receive_video_fps=0
        )
        tts = ElevenLabsTTSService(
            aiohttp_session=session,
            api_key=os.getenv("ELEVENLABS_API_KEY"),
            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
        )
        llm = OpenAILLMService(
            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
            model="gpt-4-turbo-preview")
        vs = OpenAIVisionService(api_key=os.getenv("OPENAI_CHATGPT_API_KEY"))
        vifp = VideoImageFrameProcessor()
        ir = ImageRefresher()
        pipeline = Pipeline(
            processors=[
                vifp,
                vs,
                llm,
                tts,
                ir,
            ],
        )
        @transport.event_handler("on_first_other_participant_joined")
        async def on_first_other_participant_joined(transport):
            await pipeline.queue_frames([RequestVideoImageFrame(participantId=None)])
        transport.transcription_settings["extra"]["endpointing"] = True
        transport.transcription_settings["extra"]["punctuate"] = True
        await transport.run(pipeline)
 if __name__ == "__main__":
    (url, token) = configure()
    asyncio.run(main(url, token))
--- a/src/examples/starter-apps/chatbot.py
+++ b/src/examples/starter-apps/chatbot.py
@@ -124,7 +124,6 @@ async def main(room_url: str, token):
        @transport.event_handler("on_first_other_participant_joined")
        async def on_first_other_participant_joined(transport):
            print(f"!!! in here, pipeline.source is {pipeline.source}")
            await pipeline.queue_frames([LLMMessagesQueueFrame(messages)])
        async def run_conversation():
--- a/src/examples/starter-apps/telestrator/describer.py
+++ b/src/examples/starter-apps/telestrator/describer.py
@@ -0,0 +1,100 @@
 import asyncio
 import aiohttp
 import logging
 import os
 from typing import AsyncGenerator
 from dailyai.pipeline.frames import Frame, LLMMessagesQueueFrame, RequestVideoImageFrame, LLMResponseEndFrame
 from dailyai.pipeline.pipeline import Pipeline
 from dailyai.pipeline.frame_processor import FrameProcessor
 from dailyai.services.daily_transport_service import DailyTransportService
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.services.open_ai_services import OpenAILLMService, OpenAIVisionService
 from dailyai.services.fal_ai_services import FalImageGenService
 from dailyai.services.deepgram_ai_services import DeepgramTTSService
 from dailyai.services.ai_services import FrameLogger
 from dailyai.pipeline.aggregators import (
    LLMAssistantContextAggregator,
    LLMUserContextAggregator,
 )
 from dailyai.pipeline.frames import VideoImageFrame, VisionFrame
 from examples.support.runner import configure
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
 class VideoImageFrameProcessor(FrameProcessor):
    def __init__(self):
        pass
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, VideoImageFrame):
            yield VisionFrame("Describe the image in one sentence.", frame.image)
        else:
            yield frame
 class ImageRefresher(FrameProcessor):
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, LLMResponseEndFrame):
            yield RequestVideoImageFrame(participantId=None)
            yield frame
        else:
            yield frame
 async def main(room_url: str, token):
    async with aiohttp.ClientSession() as session:
        transport = DailyTransportService(
            room_url,
            token,
            "Respond bot",
            duration_minutes=5,
            start_transcription=True,
            mic_enabled=True,
            mic_sample_rate=16000,
            camera_enabled=True,
            camera_width=1024,
            camera_height=1024,
            vad_enabled=False,
            receive_video=True,
            receive_video_fps=0
        )
        tts = ElevenLabsTTSService(
            aiohttp_session=session,
            api_key=os.getenv("ELEVENLABS_API_KEY"),
            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
        )
        llm = OpenAILLMService(
            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
            model="gpt-4-turbo-preview")
        vs = OpenAIVisionService(api_key=os.getenv("OPENAI_CHATGPT_API_KEY"))
        vifp = VideoImageFrameProcessor()
        ir = ImageRefresher()
        pipeline = Pipeline(
            processors=[
                vifp,
                vs,
                tts,
                ir,
            ],
        )
        @transport.event_handler("on_first_other_participant_joined")
        async def on_first_other_participant_joined(transport):
            await pipeline.queue_frames([RequestVideoImageFrame(participantId=None)])
        transport.transcription_settings["extra"]["endpointing"] = True
        transport.transcription_settings["extra"]["punctuate"] = True
        await transport.run(pipeline)
 if __name__ == "__main__":
    (url, token) = configure()
    asyncio.run(main(url, token))
--- a/src/examples/starter-apps/telestrator/illustrator.py
+++ b/src/examples/starter-apps/telestrator/illustrator.py
@@ -0,0 +1,112 @@
 import asyncio
 import aiohttp
 import logging
 import os
 from typing import AsyncGenerator
 from dailyai.pipeline.frames import Frame, LLMMessagesQueueFrame, RequestVideoImageFrame, LLMResponseEndFrame, UserStartedSpeakingFrame, UserStoppedSpeakingFrame, TranscriptionQueueFrame, TextFrame
 from dailyai.pipeline.pipeline import Pipeline
 from dailyai.pipeline.frame_processor import FrameProcessor
 from dailyai.services.daily_transport_service import DailyTransportService
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.services.open_ai_services import OpenAILLMService, OpenAIVisionService
 from dailyai.services.fal_ai_services import FalImageGenService
 from dailyai.services.deepgram_ai_services import DeepgramTTSService
 from dailyai.services.ai_services import FrameLogger
 from dailyai.pipeline.aggregators import (
    LLMAssistantContextAggregator,
    LLMUserContextAggregator,
 )
 from dailyai.pipeline.frames import VideoImageFrame, VisionFrame
 from examples.support.runner import configure
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
 class VADAggregator(FrameProcessor):
    def __init__(self):
        self.aggregating = False
        self.aggregation = ""
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, UserStartedSpeakingFrame):
            self.aggregating = True
        elif isinstance(frame, UserStoppedSpeakingFrame):
            self.aggregating = False
            # Sometimes VAD triggers quickly on and off. If we don't get any transcription,
            # it creates empty LLM message queue frames
            if len(self.aggregation) > 0:
                yield TextFrame(self.aggregation)
                self.aggregation = ""
                yield frame
        elif isinstance(frame, TranscriptionQueueFrame) and self.aggregating:
            self.aggregation += f" {frame.text}"
        else:
            yield frame
 async def main(room_url: str, token):
    async with aiohttp.ClientSession() as session:
        transport = DailyTransportService(
            room_url,
            token,
            "Respond bot",
            duration_minutes=5,
            start_transcription=True,
            mic_enabled=True,
            mic_sample_rate=16000,
            camera_enabled=True,
            camera_width=1024,
            camera_height=1024,
            vad_enabled=True,
            receive_video=True,
            receive_video_fps=0,
            vad_timeout_s=1.0
        )
        tts = ElevenLabsTTSService(
            aiohttp_session=session,
            api_key=os.getenv("ELEVENLABS_API_KEY"),
            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
        )
        llm = OpenAILLMService(
            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
            model="gpt-4-turbo-preview")
        vs = OpenAIVisionService(api_key=os.getenv("OPENAI_CHATGPT_API_KEY"))
        vad = VADAggregator()
        img = FalImageGenService(
            image_size="1024x1024",
            aiohttp_session=session,
            key_id=os.getenv("FAL_KEY_ID"),
            key_secret=os.getenv("FAL_KEY_SECRET"),
        )
        fl = FrameLogger("!!! Start")
        fl2 = FrameLogger("!!! AFTER VAD")
        fl3 = FrameLogger("!!! After img")
        pipeline = Pipeline(
            processors=[
                fl,
                vad,
                fl2,
                img,
                fl3
            ],
        )
        @transport.event_handler("on_first_other_participant_joined")
        async def on_first_other_participant_joined(transport):
            await pipeline.queue_frames([RequestVideoImageFrame(participantId=None)])
        transport.transcription_settings["extra"]["endpointing"] = True
        transport.transcription_settings["extra"]["punctuate"] = True
        await transport.run(pipeline)
 if __name__ == "__main__":
    (url, token) = configure()
    asyncio.run(main(url, token))
--- a/src/examples/starter-apps/telestrator/telestrator-fuzz.py
+++ b/src/examples/starter-apps/telestrator/telestrator-fuzz.py
@@ -0,0 +1,210 @@
 import asyncio
 import aiohttp
 import logging
 import os
 import random
 from typing import AsyncGenerator
 from dailyai.pipeline.frames import Frame, LLMMessagesQueueFrame, RequestVideoImageFrame, LLMResponseEndFrame, TelestratorImageFrame, ImageFrame, TextFrame
 from dailyai.pipeline.pipeline import Pipeline
 from dailyai.pipeline.frame_processor import FrameProcessor
 from dailyai.services.daily_transport_service import DailyTransportService
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.services.open_ai_services import OpenAILLMService, OpenAIVisionService
 from dailyai.services.fal_ai_services import FalImageGenService
 from dailyai.services.deepgram_ai_services import DeepgramTTSService
 from dailyai.services.ai_services import FrameLogger
 from dailyai.pipeline.aggregators import (
    LLMAssistantContextAggregator,
    LLMUserContextAggregator,
    LLMFullResponseAggregator
 )
 from dailyai.pipeline.frames import VideoImageFrame, VisionFrame
 from examples.support.runner import configure
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
 narrators = [{"voice_id": "wDRBdcyPzQOCeq51IxW5",
              "prompt": "Describe the image in one sentence."},
             {"voice_id": "M3bAX0o3Ptb2l6XqwQJV",
              "prompt": "Describe the image in one sentence, in the style of John Oliver's Last Week Tonight show."},
             {"voice_id": "lJm5d2ZZ3UE4qYOxl2t7",
              "prompt": "Describe the image in one sentence, in the style of Oprah Winfrey."},
             {"voice_id": "7SNUlQ8GAbnZxRO9CKOt",
              "prompt": "Describe the image in one sentence, in the style of a royal pronouncement by the Queen of England."},
             {"voice_id": "gvpBhHjzfd7M2WedYVUI",
              "prompt": "Describe the image in one sentence, in the style of Captain Picard from Star Trek."},
             {"voice_id": "bnyr1EF3snReVXauGBNn",
              "prompt": "Describe the image in one sentence, in the style of Maya Angelou."}]
 # random.shuffle(narrators)
 print(f"$$$ narrators: {narrators}")
 narrator = {"narrator": narrators[0]}
 class TranslationProcessor(FrameProcessor):
    def __init__(self, in_language, out_language):
        self._in_language = in_language
        self._out_language = out_language
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, TextFrame):
            context = [
                {
                    "role": "system",
                    "content": f"You will be provided with a sentence in {self._in_language}, and your task is to translate it into {self._out_language}.",
                },
                {"role": "user", "content": frame.text},
            ]
            yield LLMMessagesQueueFrame(context)
        else:
            yield frame
 class NarratorShuffle(FrameProcessor):
    def __init__(self, narrator, narrators):
        self._narrator = narrator
        self._narrators = narrators
        self._i = 0
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, (ImageFrame, TelestratorImageFrame)):
            self._i += 1
            if self._i >= len(self._narrators):
                print(f"### shuffling narrators")
                random.shuffle(self._narrators)
                self._i = 0
            self._narrator["narrator"] = self._narrators[self._i]
            print(f"### new narrator is {self._narrator}")
        yield frame
 class VideoImageFrameProcessor(FrameProcessor):
    def __init__(self, narrator):
        self._narrator = narrator
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, (VideoImageFrame, TelestratorImageFrame)):
            yield VisionFrame(self._narrator["narrator"]["prompt"], frame.image)
        else:
            yield frame
 class ImageRefresher(FrameProcessor):
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, LLMResponseEndFrame):
            yield RequestVideoImageFrame(participantId=None)
            yield frame
        else:
            yield frame
 class TelestratorImageWrapper(FrameProcessor):
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, ImageFrame):
            yield TelestratorImageFrame(None, frame.image)
        else:
            yield frame
 async def main(room_url: str, token):
    async with aiohttp.ClientSession() as session:
        transport = DailyTransportService(
            room_url,
            token,
            "Respond bot",
            duration_minutes=5,
            start_transcription=True,
            mic_enabled=True,
            mic_sample_rate=16000,
            camera_enabled=True,
            camera_width=1024,
            camera_height=576,
            vad_enabled=False,
            receive_video=True,
            receive_video_fps=0
        )
        tts = ElevenLabsTTSService(
            aiohttp_session=session,
            api_key=os.getenv("ELEVENLABS_API_KEY"),
            narrator=narrator,
            aggregate_sentences=False
        )
        llm = OpenAILLMService(
            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
            model="gpt-4-turbo-preview")
        vs = OpenAIVisionService(api_key=os.getenv("OPENAI_CHATGPT_API_KEY"))
        vifp = VideoImageFrameProcessor(narrator)
        ir = ImageRefresher()
        img = FalImageGenService(
            image_size="1024x1024",
            aiohttp_session=session,
            key_id=os.getenv("FAL_KEY_ID"),
            key_secret=os.getenv("FAL_KEY_SECRET"),
        )
        tiw = TelestratorImageWrapper()
        lfra = LLMFullResponseAggregator()
        lfra1 = LLMFullResponseAggregator()
        lfra2 = LLMFullResponseAggregator()
        lfra3 = LLMFullResponseAggregator()
        lfra4 = LLMFullResponseAggregator()
        fl0 = FrameLogger("@@@ About to describe")
        fl1 = FrameLogger("!!! About to image gen")
        f4 = FrameLogger("((( partway through )))")
        f5 = FrameLogger("!!! f5")
        ns = NarratorShuffle(narrator, narrators)
        t1 = TranslationProcessor("English", "Spanish")
        t2 = TranslationProcessor("Spanish", "German")
        t3 = TranslationProcessor("German", "Japanese")
        t4 = TranslationProcessor("Japanese", "English")
        pipeline = Pipeline(
            processors=[
                fl0,
                vifp,
                vs,
                lfra,
                tts,
                f4,
                t1,
                llm,
                lfra1,
                f5,
                tts,
                t2,
                llm,
                lfra2,
                tts,
                t3,
                llm,
                lfra3,
                tts,
                t4,
                llm,
                lfra4,
                tts,
                fl1,
                img,
                tiw,
            ],
        )
        @transport.event_handler("on_first_other_participant_joined")
        async def on_first_other_participant_joined(transport):
            await pipeline.queue_frames([RequestVideoImageFrame(participantId=None)])
        transport.transcription_settings["extra"]["endpointing"] = True
        transport.transcription_settings["extra"]["punctuate"] = True
        await transport.run(pipeline)
 if __name__ == "__main__":
    (url, token) = configure()
    asyncio.run(main(url, token))
--- a/src/examples/starter-apps/telestrator/telestrator-haiku.py
+++ b/src/examples/starter-apps/telestrator/telestrator-haiku.py
@@ -0,0 +1,191 @@
 import asyncio
 import aiohttp
 import logging
 import os
 import random
 from typing import AsyncGenerator
 from dailyai.pipeline.frames import Frame, LLMMessagesQueueFrame, RequestVideoImageFrame, LLMResponseEndFrame, TelestratorImageFrame, ImageFrame, TextFrame
 from dailyai.pipeline.pipeline import Pipeline
 from dailyai.pipeline.frame_processor import FrameProcessor
 from dailyai.services.daily_transport_service import DailyTransportService
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.services.open_ai_services import OpenAILLMService, OpenAIVisionService
 from dailyai.services.fal_ai_services import FalImageGenService
 from dailyai.services.deepgram_ai_services import DeepgramTTSService
 from dailyai.services.ai_services import FrameLogger
 from dailyai.pipeline.aggregators import (
    LLMAssistantContextAggregator,
    LLMUserContextAggregator,
    LLMFullResponseAggregator
 )
 from dailyai.pipeline.frames import VideoImageFrame, VisionFrame
 from examples.support.runner import configure
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
 narrators = [{"voice_id": "wDRBdcyPzQOCeq51IxW5",
              "prompt": "Describe the image in a haiku."},
             {"voice_id": "M3bAX0o3Ptb2l6XqwQJV",
              "prompt": "Describe the image in one sentence, in the style of John Oliver's Last Week Tonight show."},
             {"voice_id": "lJm5d2ZZ3UE4qYOxl2t7",
              "prompt": "Describe the image in one sentence, in the style of Oprah Winfrey."},
             {"voice_id": "7SNUlQ8GAbnZxRO9CKOt",
              "prompt": "Describe the image in one sentence, in the style of a royal pronouncement by the Queen of England."},
             {"voice_id": "gvpBhHjzfd7M2WedYVUI",
              "prompt": "Describe the image in one sentence, in the style of Captain Picard from Star Trek."},
             {"voice_id": "bnyr1EF3snReVXauGBNn",
              "prompt": "Describe the image in one sentence, in the style of Maya Angelou."}]
 # random.shuffle(narrators)
 print(f"$$$ narrators: {narrators}")
 narrator = {"narrator": narrators[0]}
 class TranslationProcessor(FrameProcessor):
    def __init__(self, in_language, out_language):
        self._in_language = in_language
        self._out_language = out_language
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, TextFrame):
            context = [
                {
                    "role": "system",
                    "content": f"You will be provided with a sentence in {self._in_language}, and your task is to translate it into {self._out_language}.",
                },
                {"role": "user", "content": frame.text},
            ]
            yield LLMMessagesQueueFrame(context)
        else:
            yield frame
 class NarratorShuffle(FrameProcessor):
    def __init__(self, narrator, narrators):
        self._narrator = narrator
        self._narrators = narrators
        self._i = 0
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, (ImageFrame, TelestratorImageFrame)):
            self._i += 1
            if self._i >= len(self._narrators):
                print(f"### shuffling narrators")
                random.shuffle(self._narrators)
                self._i = 0
            self._narrator["narrator"] = self._narrators[self._i]
            print(f"### new narrator is {self._narrator}")
        yield frame
 class VideoImageFrameProcessor(FrameProcessor):
    def __init__(self, narrator):
        self._narrator = narrator
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, (VideoImageFrame, TelestratorImageFrame)):
            yield VisionFrame(self._narrator["narrator"]["prompt"], frame.image)
        else:
            yield frame
 class ImageRefresher(FrameProcessor):
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, LLMResponseEndFrame):
            yield RequestVideoImageFrame(participantId=None)
            yield frame
        else:
            yield frame
 class TelestratorImageWrapper(FrameProcessor):
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, ImageFrame):
            yield TelestratorImageFrame(None, frame.image)
        else:
            yield frame
 async def main(room_url: str, token):
    async with aiohttp.ClientSession() as session:
        transport = DailyTransportService(
            room_url,
            token,
            "Respond bot",
            duration_minutes=5,
            start_transcription=True,
            mic_enabled=True,
            mic_sample_rate=16000,
            camera_enabled=True,
            camera_width=1024,
            camera_height=1024,
            vad_enabled=False,
            receive_video=True,
            receive_video_fps=0
        )
        tts = ElevenLabsTTSService(
            aiohttp_session=session,
            api_key=os.getenv("ELEVENLABS_API_KEY"),
            narrator=narrator,
            aggregate_sentences=False
        )
        llm = OpenAILLMService(
            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
            model="gpt-4-turbo-preview")
        vs = OpenAIVisionService(api_key=os.getenv("OPENAI_CHATGPT_API_KEY"))
        vifp = VideoImageFrameProcessor(narrator)
        ir = ImageRefresher()
        img = FalImageGenService(
            image_size="1024x1024",
            aiohttp_session=session,
            key_id=os.getenv("FAL_KEY_ID"),
            key_secret=os.getenv("FAL_KEY_SECRET"),
        )
        tiw = TelestratorImageWrapper()
        lfra = LLMFullResponseAggregator()
        lfra1 = LLMFullResponseAggregator()
        lfra2 = LLMFullResponseAggregator()
        lfra3 = LLMFullResponseAggregator()
        lfra4 = LLMFullResponseAggregator()
        fl0 = FrameLogger("@@@ About to describe")
        fl1 = FrameLogger("!!! About to image gen")
        f4 = FrameLogger("((( partway through )))")
        f5 = FrameLogger("!!! f5")
        ns = NarratorShuffle(narrator, narrators)
        t1 = TranslationProcessor("English", "Spanish")
        t2 = TranslationProcessor("Spanish", "German")
        t3 = TranslationProcessor("German", "Japanese")
        t4 = TranslationProcessor("Japanese", "English")
        pipeline = Pipeline(
            processors=[
                fl0,
                vifp,
                vs,
                lfra,
                tts,
                fl1,
                img,
                tiw,
            ],
        )
        @transport.event_handler("on_first_other_participant_joined")
        async def on_first_other_participant_joined(transport):
            await pipeline.queue_frames([RequestVideoImageFrame(participantId=None)])
        transport.transcription_settings["extra"]["endpointing"] = True
        transport.transcription_settings["extra"]["punctuate"] = True
        await transport.run(pipeline)
 if __name__ == "__main__":
    (url, token) = configure()
    asyncio.run(main(url, token))
--- a/src/examples/starter-apps/telestrator/telestrator-wordcount.py
+++ b/src/examples/starter-apps/telestrator/telestrator-wordcount.py
@@ -0,0 +1,191 @@
 import asyncio
 import aiohttp
 import logging
 import os
 import random
 from typing import AsyncGenerator
 from dailyai.pipeline.frames import Frame, LLMMessagesQueueFrame, RequestVideoImageFrame, LLMResponseEndFrame, TelestratorImageFrame, ImageFrame, TextFrame
 from dailyai.pipeline.pipeline import Pipeline
 from dailyai.pipeline.frame_processor import FrameProcessor
 from dailyai.services.daily_transport_service import DailyTransportService
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.services.open_ai_services import OpenAILLMService, OpenAIVisionService
 from dailyai.services.fal_ai_services import FalImageGenService
 from dailyai.services.deepgram_ai_services import DeepgramTTSService
 from dailyai.services.ai_services import FrameLogger
 from dailyai.pipeline.aggregators import (
    LLMAssistantContextAggregator,
    LLMUserContextAggregator,
    LLMFullResponseAggregator
 )
 from dailyai.pipeline.frames import VideoImageFrame, VisionFrame
 from examples.support.runner import configure
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
 narrators = [{"voice_id": "wDRBdcyPzQOCeq51IxW5",
              "prompt": "Describe the image in nine words."},
             {"voice_id": "M3bAX0o3Ptb2l6XqwQJV",
              "prompt": "Describe the image in one sentence, in the style of John Oliver's Last Week Tonight show."},
             {"voice_id": "lJm5d2ZZ3UE4qYOxl2t7",
              "prompt": "Describe the image in one sentence, in the style of Oprah Winfrey."},
             {"voice_id": "7SNUlQ8GAbnZxRO9CKOt",
              "prompt": "Describe the image in one sentence, in the style of a royal pronouncement by the Queen of England."},
             {"voice_id": "gvpBhHjzfd7M2WedYVUI",
              "prompt": "Describe the image in one sentence, in the style of Captain Picard from Star Trek."},
             {"voice_id": "bnyr1EF3snReVXauGBNn",
              "prompt": "Describe the image in one sentence, in the style of Maya Angelou."}]
 # random.shuffle(narrators)
 print(f"$$$ narrators: {narrators}")
 narrator = {"narrator": narrators[0]}
 class TranslationProcessor(FrameProcessor):
    def __init__(self, in_language, out_language):
        self._in_language = in_language
        self._out_language = out_language
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, TextFrame):
            context = [
                {
                    "role": "system",
                    "content": f"You will be provided with a sentence in {self._in_language}, and your task is to translate it into {self._out_language}.",
                },
                {"role": "user", "content": frame.text},
            ]
            yield LLMMessagesQueueFrame(context)
        else:
            yield frame
 class NarratorShuffle(FrameProcessor):
    def __init__(self, narrator, narrators):
        self._narrator = narrator
        self._narrators = narrators
        self._i = 0
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, (ImageFrame, TelestratorImageFrame)):
            self._i += 1
            if self._i >= len(self._narrators):
                print(f"### shuffling narrators")
                random.shuffle(self._narrators)
                self._i = 0
            self._narrator["narrator"] = self._narrators[self._i]
            print(f"### new narrator is {self._narrator}")
        yield frame
 class VideoImageFrameProcessor(FrameProcessor):
    def __init__(self, narrator):
        self._narrator = narrator
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, (VideoImageFrame, TelestratorImageFrame)):
            yield VisionFrame(self._narrator["narrator"]["prompt"], frame.image)
        else:
            yield frame
 class ImageRefresher(FrameProcessor):
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, LLMResponseEndFrame):
            yield RequestVideoImageFrame(participantId=None)
            yield frame
        else:
            yield frame
 class TelestratorImageWrapper(FrameProcessor):
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, ImageFrame):
            yield TelestratorImageFrame(None, frame.image)
        else:
            yield frame
 async def main(room_url: str, token):
    async with aiohttp.ClientSession() as session:
        transport = DailyTransportService(
            room_url,
            token,
            "Respond bot",
            duration_minutes=5,
            start_transcription=True,
            mic_enabled=True,
            mic_sample_rate=16000,
            camera_enabled=True,
            camera_width=1024,
            camera_height=1024,
            vad_enabled=False,
            receive_video=True,
            receive_video_fps=0
        )
        tts = ElevenLabsTTSService(
            aiohttp_session=session,
            api_key=os.getenv("ELEVENLABS_API_KEY"),
            narrator=narrator,
            aggregate_sentences=False
        )
        llm = OpenAILLMService(
            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
            model="gpt-4-turbo-preview")
        vs = OpenAIVisionService(api_key=os.getenv("OPENAI_CHATGPT_API_KEY"))
        vifp = VideoImageFrameProcessor(narrator)
        ir = ImageRefresher()
        img = FalImageGenService(
            image_size="1024x1024",
            aiohttp_session=session,
            key_id=os.getenv("FAL_KEY_ID"),
            key_secret=os.getenv("FAL_KEY_SECRET"),
        )
        tiw = TelestratorImageWrapper()
        lfra = LLMFullResponseAggregator()
        lfra1 = LLMFullResponseAggregator()
        lfra2 = LLMFullResponseAggregator()
        lfra3 = LLMFullResponseAggregator()
        lfra4 = LLMFullResponseAggregator()
        fl0 = FrameLogger("@@@ About to describe")
        fl1 = FrameLogger("!!! About to image gen")
        f4 = FrameLogger("((( partway through )))")
        f5 = FrameLogger("!!! f5")
        ns = NarratorShuffle(narrator, narrators)
        t1 = TranslationProcessor("English", "Spanish")
        t2 = TranslationProcessor("Spanish", "German")
        t3 = TranslationProcessor("German", "Japanese")
        t4 = TranslationProcessor("Japanese", "English")
        pipeline = Pipeline(
            processors=[
                fl0,
                vifp,
                vs,
                lfra,
                tts,
                fl1,
                img,
                tiw,
            ],
        )
        @transport.event_handler("on_first_other_participant_joined")
        async def on_first_other_participant_joined(transport):
            await pipeline.queue_frames([RequestVideoImageFrame(participantId=None)])
        transport.transcription_settings["extra"]["endpointing"] = True
        transport.transcription_settings["extra"]["punctuate"] = True
        await transport.run(pipeline)
 if __name__ == "__main__":
    (url, token) = configure()
    asyncio.run(main(url, token))
--- a/src/examples/starter-apps/telestrator/telestrator.py
+++ b/src/examples/starter-apps/telestrator/telestrator.py
@@ -0,0 +1,162 @@
 import asyncio
 import aiohttp
 import logging
 import os
 import random
 from typing import AsyncGenerator
 from dailyai.pipeline.frames import Frame, LLMMessagesQueueFrame, RequestVideoImageFrame, LLMResponseEndFrame, TelestratorImageFrame, ImageFrame
 from dailyai.pipeline.pipeline import Pipeline
 from dailyai.pipeline.frame_processor import FrameProcessor
 from dailyai.services.daily_transport_service import DailyTransportService
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.services.open_ai_services import OpenAILLMService, OpenAIVisionService
 from dailyai.services.fal_ai_services import FalImageGenService
 from dailyai.services.deepgram_ai_services import DeepgramTTSService
 from dailyai.services.ai_services import FrameLogger
 from dailyai.pipeline.aggregators import (
    LLMAssistantContextAggregator,
    LLMUserContextAggregator,
    LLMFullResponseAggregator
 )
 from dailyai.pipeline.frames import VideoImageFrame, VisionFrame
 from examples.support.runner import configure
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
 narrators = [{"voice_id": "wDRBdcyPzQOCeq51IxW5",
              "prompt": "Describe the image in one sentence, in the style of David Attenborough."},
             {"voice_id": "M3bAX0o3Ptb2l6XqwQJV",
              "prompt": "Describe the image in one sentence, in the style of John Oliver's Last Week Tonight show."},
             {"voice_id": "lJm5d2ZZ3UE4qYOxl2t7",
              "prompt": "Describe the image in one sentence, in the style of Oprah Winfrey."},
             {"voice_id": "7SNUlQ8GAbnZxRO9CKOt",
              "prompt": "Describe the image in one sentence, in the style of a royal pronouncement by the Queen of England."},
             {"voice_id": "gvpBhHjzfd7M2WedYVUI",
              "prompt": "Describe the image in one sentence, in the style of Captain Picard from Star Trek."},
             {"voice_id": "bnyr1EF3snReVXauGBNn",
              "prompt": "Describe the image in one sentence, in the style of Maya Angelou."}]
 random.shuffle(narrators)
 print(f"$$$ narrators: {narrators}")
 narrator = {"narrator": narrators[0]}
 class NarratorShuffle(FrameProcessor):
    def __init__(self, narrator, narrators):
        self._narrator = narrator
        self._narrators = narrators
        self._i = 0
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, (ImageFrame, TelestratorImageFrame)):
            self._i += 1
            if self._i >= len(self._narrators):
                print(f"### shuffling narrators")
                random.shuffle(self._narrators)
                self._i = 0
            self._narrator["narrator"] = self._narrators[self._i]
            print(f"### new narrator is {self._narrator}")
        yield frame
 class VideoImageFrameProcessor(FrameProcessor):
    def __init__(self, narrator):
        self._narrator = narrator
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, (VideoImageFrame, TelestratorImageFrame)):
            yield VisionFrame(self._narrator["narrator"]["prompt"], frame.image)
        else:
            yield frame
 class ImageRefresher(FrameProcessor):
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, LLMResponseEndFrame):
            yield RequestVideoImageFrame(participantId=None)
            yield frame
        else:
            yield frame
 class TelestratorImageWrapper(FrameProcessor):
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, ImageFrame):
            yield TelestratorImageFrame(None, frame.image)
        else:
            yield frame
 async def main(room_url: str, token):
    async with aiohttp.ClientSession() as session:
        transport = DailyTransportService(
            room_url,
            token,
            "Respond bot",
            duration_minutes=5,
            start_transcription=True,
            mic_enabled=True,
            mic_sample_rate=16000,
            camera_enabled=True,
            camera_width=1024,
            camera_height=1024,
            vad_enabled=False,
            receive_video=True,
            receive_video_fps=0
        )
        tts = ElevenLabsTTSService(
            aiohttp_session=session,
            api_key=os.getenv("ELEVENLABS_API_KEY"),
            narrator=narrator,
            aggregate_sentences=False
        )
        llm = OpenAILLMService(
            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
            model="gpt-4-turbo-preview")
        vs = OpenAIVisionService(api_key=os.getenv("OPENAI_CHATGPT_API_KEY"))
        vifp = VideoImageFrameProcessor(narrator)
        ir = ImageRefresher()
        img = FalImageGenService(
            image_size="1024x1024",
            aiohttp_session=session,
            key_id=os.getenv("FAL_KEY_ID"),
            key_secret=os.getenv("FAL_KEY_SECRET"),
        )
        tiw = TelestratorImageWrapper()
        lfra = LLMFullResponseAggregator()
        fl0 = FrameLogger("@@@ About to describe")
        fl1 = FrameLogger("!!! About to image gen")
        ns = NarratorShuffle(narrator, narrators)
        pipeline = Pipeline(
            processors=[
                ns,
                fl0,
                vifp,
                vs,
                lfra,
                tts,
                fl1,
                img,
                tiw,
            ],
        )
        @transport.event_handler("on_first_other_participant_joined")
        async def on_first_other_participant_joined(transport):
            await pipeline.queue_frames([RequestVideoImageFrame(participantId=None)])
        transport.transcription_settings["extra"]["endpointing"] = True
        transport.transcription_settings["extra"]["punctuate"] = True
        await transport.run(pipeline)
 if __name__ == "__main__":
    (url, token) = configure()
    asyncio.run(main(url, token))
--- a/src/examples/starter-apps/translator.py
+++ b/src/examples/starter-apps/translator.py
@@ -26,7 +26,8 @@ logger.setLevel(logging.DEBUG)
 """
 This example looks a bit different than the chatbot example, because it isn't waiting on the user to stop talking to start translating.
-It also isn't saving what the user or bot says into the context object for use in subsequent interactions.
+It also isn't saving what the user or bot says into the context object for use in subsequent interactions. This example also sends
 the translated text back to the transport as an App Message, so clients can show subtitles.
 """
@@ -50,6 +51,20 @@ class TranslationProcessor(FrameProcessor):
            yield frame
 class TranslationSubtitles(FrameProcessor):
    def __init__(self, language):
        self._language = language
    async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
        if isinstance(frame, TextFrame):
            app_message = {
                "language": self._language,
                "text": frame.text
            }
        else:
            yield frame
 async def main(room_url: str, token):
    async with aiohttp.ClientSession() as session:
        transport = DailyTransportService(
@@ -73,7 +88,8 @@ async def main(room_url: str, token):
            model="gpt-4-turbo-preview")
        sa = SentenceAggregator()
        tp = TranslationProcessor("Spanish")
-        pipeline = Pipeline([sa, tp, llm, tts])
+        ts = TranslationSubtitles("Spanish")
        pipeline = Pipeline([sa, tp, llm, tts, ts])
        transport.transcription_settings["extra"]["endpointing"] = True
        transport.transcription_settings["extra"]["punctuate"] = True
Author	SHA1	Message	Date
Chad Bailey	c73fb4750f	added fuzz example	2024-03-22 14:20:16 +00:00
Chad Bailey	34b10cb4c7	wip	2024-03-19 22:04:47 +00:00
Chad Bailey	e726f15c4e	wip: telestrator	2024-03-19 15:31:19 +00:00
Chad Bailey	25ca8b751e	cleanup	2024-03-19 03:08:04 +00:00
Chad Bailey	0b4b63d2ee	Working vision example	2024-03-19 01:51:36 +00:00
Chad Bailey	6c9425d66a	wip: video image frames	2024-03-18 22:14:02 +00:00
Chad Bailey	6d3c52ae81	added app message	2024-03-18 19:52:31 +00:00