Merge pull request #16 from daily-co/image_tweaks

Minor Cleanup
This commit is contained in:
Moishe Lettvin
2024-01-27 19:10:52 -05:00
committed by GitHub
8 changed files with 62 additions and 50 deletions

View File

@@ -32,7 +32,8 @@ class ImageQueueFrame(QueueFrame):
@dataclass()
class SpriteQueueFrame(QueueFrame):
images: list[bytes] | None
images: list[bytes]
@dataclass()
class TextQueueFrame(QueueFrame):

View File

@@ -101,7 +101,7 @@ class AzureImageGenServiceREST(ImageGenService):
def __init__(
self,
image_size: str,
aiohttp_session:aiohttp.ClientSession,
aiohttp_session: aiohttp.ClientSession,
api_key=None,
azure_endpoint=None,
api_version=None,

View File

@@ -1,5 +1,6 @@
import asyncio
import inspect
import itertools
import logging
import sys
import threading
@@ -8,7 +9,7 @@ import types
from functools import partial
from queue import Queue, Empty
from typing import AsyncGenerator
from typing import AsyncGenerator, Iterable
from dailyai.queue_frame import (
AudioQueueFrame,
@@ -62,7 +63,8 @@ class DailyTransportService(EventHandler):
# This queue is used to marshal frames from the async send queue to the thread that emits audio & video.
# We need this to maintain the asynchronous behavior of asyncio queues -- to give async functions
# a chance to run while waiting for queue items -- but also to maintain thread safety and have a threaded
# handler to send frames, to ensure that sending isn't subject to pauses in the async thread.
# handler to send frames, to ensure that sending isn't subject to pauses
# in the async thread.
self.threadsafe_send_queue = Queue()
self._is_interrupted = Event()
@@ -168,10 +170,9 @@ class DailyTransportService(EventHandler):
)
Daily.select_speaker_device("speaker")
self._image: bytes | None = None
self._images: list[bytes] | None = None
self._images = None
self._camera_thread = Thread(target=self.run_camera, daemon=True)
self._camera_thread = Thread(target=self._run_camera, daemon=True)
self._camera_thread.start()
self._logger.info("Starting frame consumer thread")
@@ -343,24 +344,18 @@ class DailyTransportService(EventHandler):
def on_transcription_started(self, status):
pass
def set_image(self, image: bytes):
self._image: bytes | None = image
self._images: list[bytes] | None = None
def set_images(self, images: list[bytes], start_frame=0):
self._images: list[bytes] | None = images
self._image = None
self._current_frame = start_frame
def run_camera(self):
def _set_image(self, image: bytes):
self._images = itertools.cycle([image])
def _set_images(self, images: list[bytes], start_frame=0):
self._images = itertools.cycle(images)
def _run_camera(self):
try:
while not self._stop_threads.is_set():
if self._image:
self.camera.write_frame(self._image)
if self._images:
this_frame = self._images[self._current_frame]
this_frame = next(self._images)
self.camera.write_frame(this_frame)
self._current_frame = (self._current_frame + 1) % len(self._images)
time.sleep(1.0 / 8) # 8 fps
except Exception as e:
@@ -402,9 +397,9 @@ class DailyTransportService(EventHandler):
self.mic.write_frames(bytes(b[:l]))
b = b[l:]
elif isinstance(frame, ImageQueueFrame):
self.set_image(frame.image)
self._set_image(frame.image)
elif isinstance(frame, SpriteQueueFrame):
self.set_images(frame.images)
self._set_images(frame.images)
elif len(b):
self.mic.write_frames(bytes(b))
b = bytearray()

View File

@@ -11,7 +11,7 @@ from dailyai.services.ai_services import LLMService, TTSService, ImageGenService
class FalImageGenService(ImageGenService):
def __init__(self, image_size, aiohttp_session:aiohttp.ClientSession):
def __init__(self, image_size, aiohttp_session: aiohttp.ClientSession):
super().__init__(image_size)
self._aiohttp_session = aiohttp_session

View File

@@ -16,7 +16,7 @@ from dailyai.services.fal_ai_services import FalImageGenService
class ImageSyncAggregator(AIService):
def __init__(self, speaking_path:str, waiting_path:str):
def __init__(self, speaking_path: str, waiting_path: str):
self._speaking_image = Image.open(speaking_path)
self._speaking_image_bytes = self._speaking_image.tobytes()
@@ -28,6 +28,7 @@ class ImageSyncAggregator(AIService):
yield frame
yield ImageQueueFrame(None, self._waiting_image_bytes)
async def main(room_url: str, token):
global transport
global llm

View File

@@ -7,21 +7,22 @@ import requests
import time
import urllib.parse
from dotenv import load_dotenv
from PIL import Image
load_dotenv()
from dailyai.services.daily_transport_service import DailyTransportService
from dailyai.services.azure_ai_services import AzureLLMService, AzureTTSService
from dailyai.services.azure_ai_services import AzureLLMService
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
from dailyai.services.fal_ai_services import FalImageGenService
from dailyai.services.open_ai_services import OpenAIImageGenService
from dailyai.queue_aggregators import LLMContextAggregator
from dailyai.queue_frame import LLMMessagesQueueFrame, QueueFrame, TextQueueFrame, ImageQueueFrame, SpriteQueueFrame
from dailyai.queue_frame import (
QueueFrame,
TextQueueFrame,
ImageQueueFrame,
SpriteQueueFrame,
TranscriptionQueueFrame,
)
from dailyai.services.ai_services import AIService
from typing import AsyncGenerator, List
from typing import AsyncGenerator
sprites = {}
image_files = [
@@ -53,23 +54,30 @@ talking = [random.choice(talking_list) for x in range(30)]
talking_frame = SpriteQueueFrame(images=talking)
# TODO: Support "thinking" as soon as we get a valid transcript, while LLM is processing
thinking_list = [sprites['sc-think-1.png'], sprites['sc-think-2.png'], sprites['sc-think-3.png'], sprites['sc-think-4.png']]
thinking_list = [
sprites['sc-think-1.png'],
sprites['sc-think-2.png'],
sprites['sc-think-3.png'],
sprites['sc-think-4.png']]
thinking_frame = SpriteQueueFrame(images=thinking_list)
class TranscriptFilter(AIService):
def __init__(self, bot_participant_id=None):
self.bot_participant_id = bot_participant_id
async def process_frame(self, frame:QueueFrame) -> AsyncGenerator[QueueFrame, None]:
if frame.participantId != self.bot_participant_id:
yield frame
async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, None]:
if isinstance(frame, TranscriptionQueueFrame):
if frame.participantId != self.bot_participant_id:
yield frame
class NameCheckFilter(AIService):
def __init__(self, names=None):
self.names = names
self.sentence = ""
async def process_frame(self, frame:QueueFrame) -> AsyncGenerator[QueueFrame, None]:
async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, None]:
content: str = ""
# TODO: split up transcription by participant
@@ -86,6 +94,7 @@ class NameCheckFilter(AIService):
out = self.sentence
self.sentence = ""
class ImageSyncAggregator(AIService):
def __init__(self):
pass
@@ -95,7 +104,8 @@ class ImageSyncAggregator(AIService):
yield frame
yield quiet_frame
async def main(room_url:str, token):
async def main(room_url: str, token):
async with aiohttp.ClientSession() as session:
global transport
global llm
@@ -153,14 +163,11 @@ async def main(room_url:str, token):
async def starting_image():
await transport.send_queue.put(quiet_frame)
transport.transcription_settings["extra"]["punctuate"] = True
await asyncio.gather(transport.run(), handle_transcriptions(), starting_image())
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple Daily Bot Sample")
parser.add_argument(

View File

@@ -8,14 +8,21 @@ import os
load_dotenv()
def get_meeting_token(room_name, daily_api_key, token_expiry):
api_path = os.getenv('DAILY_API_PATH') or 'https://api.daily.co/v1'
if not token_expiry:
token_expiry = time.time() + 600
res = requests.post(f'{api_path}/meeting-tokens',
headers={'Authorization': f'Bearer {daily_api_key}'},
json={'properties': {'room_name': room_name, 'is_owner': True, 'exp': token_expiry}})
res = requests.post(
f'{api_path}/meeting-tokens',
headers={
'Authorization': f'Bearer {daily_api_key}'},
json={
'properties': {
'room_name': room_name,
'is_owner': True,
'exp': token_expiry}})
if res.status_code != 200:
return jsonify({'error': 'Unable to create meeting token', 'detail': res.text}), 500
meeting_token = res.json()['token']
@@ -23,4 +30,4 @@ def get_meeting_token(room_name, daily_api_key, token_expiry):
def get_room_name(room_url):
return urllib.parse.urlparse(room_url).path[1:]
return urllib.parse.urlparse(room_url).path[1:]

View File

@@ -16,6 +16,7 @@ CORS(app)
print(f"I loaded an environment, and my FAL_KEY_ID is {os.getenv('FAL_KEY_ID')}")
def start_bot(bot_path, args=None):
daily_api_key = os.getenv("DAILY_API_KEY")
api_path = os.getenv("DAILY_API_PATH") or "https://api.daily.co/v1"
@@ -77,7 +78,7 @@ def start_bot(bot_path, args=None):
if res.status_code == 200:
break
print(f"Took {attempts} attempts to join room {room_name}")
# Additional client config
config = {}
if os.getenv("CLIENT_VAD_TIMEOUT_SEC"):
@@ -85,7 +86,7 @@ def start_bot(bot_path, args=None):
else:
config['vad_timeout_sec'] = 1.5
#return jsonify({"room_url": room_url, "token": meeting_token, "config": config}), 200
# return jsonify({"room_url": room_url, "token": meeting_token, "config": config}), 200
return redirect(room_url, code=301)
@@ -96,4 +97,4 @@ def spin_up_kitty():
@app.route("/healthz")
def health_check():
return "ok", 200
return "ok", 200