diff --git a/src/dailyai/services/fal_ai_services.py b/src/dailyai/services/fal_ai_services.py index 324ff0ec4..8527cb168 100644 --- a/src/dailyai/services/fal_ai_services.py +++ b/src/dailyai/services/fal_ai_services.py @@ -9,12 +9,10 @@ from PIL import Image from dailyai.services.ai_services import LLMService, TTSService, ImageGenService # Fal expects FAL_KEY_ID and FAL_KEY_SECRET to be set in the env class FalImageGenService(ImageGenService): - def __init__(self): - super().__init__() + def __init__(self, image_size): + super().__init__(image_size) - - - async def run_image_gen(self, sentence, size) -> tuple[str, bytes]: + async def run_image_gen(self, sentence) -> tuple[str, bytes]: def get_image_url(sentence, size): print("starting fal submit...") handler = fal.apps.submit( @@ -37,7 +35,7 @@ class FalImageGenService(ImageGenService): return image_url print(f"fetching image url...") - image_url = await asyncio.to_thread(get_image_url, sentence, size) + image_url = await asyncio.to_thread(get_image_url, sentence, self.image_size) print(f"got image url, downloading image...") # Load the image from the url async with aiohttp.ClientSession() as session: @@ -48,4 +46,4 @@ class FalImageGenService(ImageGenService): image = Image.open(image_stream) return (image_url, image.tobytes()) - # return (image_url, dalle_im.tobytes()) \ No newline at end of file + # return (image_url, dalle_im.tobytes()) diff --git a/src/samples/theoretical-to-real/05-sync-speech-and-text.py b/src/samples/theoretical-to-real/05-sync-speech-and-text.py index a839d8385..b0cd1e7c7 100644 --- a/src/samples/theoretical-to-real/05-sync-speech-and-text.py +++ b/src/samples/theoretical-to-real/05-sync-speech-and-text.py @@ -27,9 +27,9 @@ async def main(room_url): transport.camera_height = 1024 llm = AzureLLMService() - dalle = FalImageGenService() + dalle = FalImageGenService(image_size="1024x1024") tts = ElevenLabsTTSService(voice_id="ErXwobaYiN019PkySvjV") - #dalle = OpenAIImageGenService(image_size="1024x1024") + # dalle = OpenAIImageGenService(image_size="1024x1024") # Get a complete audio chunk from the given text. Splitting this into its own # coroutine lets us ensure proper ordering of the audio chunks on the send queue.