173 lines
5.7 KiB
Python
173 lines
5.7 KiB
Python
import argparse
|
|
from email.mime import image
|
|
import logging
|
|
import os
|
|
import random
|
|
import requests
|
|
import time
|
|
import urllib.parse
|
|
|
|
from PIL import Image
|
|
|
|
from dailyai.async_processor.async_processor import (
|
|
ConversationProcessorCollection,
|
|
LLMResponse,
|
|
OrchestratorResponse
|
|
)
|
|
from dailyai.orchestrator import OrchestratorConfig, Orchestrator
|
|
from dailyai.queue_frame import QueueFrame, FrameType
|
|
from dailyai.message_handler.message_handler import MessageHandler
|
|
from dailyai.services.ai_services import AIServiceConfig
|
|
from dailyai.services.azure_ai_services import AzureImageGenService, AzureTTSService, AzureLLMService
|
|
|
|
class StaticSpriteResponse(OrchestratorResponse):
|
|
|
|
def __init__(
|
|
self,
|
|
services,
|
|
message_handler,
|
|
output_queue
|
|
) -> None:
|
|
super().__init__(services, message_handler, output_queue)
|
|
self.image_bytes:bytes | None = None
|
|
self.filenames = None # override this in subclasses
|
|
|
|
def start_preparation(self) -> None:
|
|
full_path = os.path.join(os.path.dirname(__file__), "sprites/", self.filename)
|
|
print(full_path)
|
|
|
|
with Image.open(full_path) as img:
|
|
self.image_bytes = img.tobytes()
|
|
|
|
def do_play(self) -> None:
|
|
self.output_queue.put(QueueFrame(FrameType.IMAGE, self.image_bytes))
|
|
|
|
|
|
class IntroSpriteResponse(StaticSpriteResponse):
|
|
def __init__(self, services, message_handler, output_queue) -> None:
|
|
super().__init__(services, message_handler, output_queue)
|
|
self.filename = "intro.png"
|
|
|
|
|
|
class WaitingSpriteResponse(StaticSpriteResponse):
|
|
def __init__(self, services, message_handler, output_queue) -> None:
|
|
super().__init__(services, message_handler, output_queue)
|
|
self.filename = "waiting.png"
|
|
|
|
|
|
class AnimatedSpriteLLMResponse(LLMResponse):
|
|
def __init__(self, services, message_handler, output_queue) -> None:
|
|
super().__init__(services, message_handler, output_queue)
|
|
self.filenames = ["talk-1.png", "talk-2.png"]
|
|
self.image_bytes = []
|
|
|
|
def start_preparation(self) -> None:
|
|
super().start_preparation()
|
|
|
|
for filename in self.filenames:
|
|
full_path = os.path.join(os.path.dirname(__file__), "sprites/", filename)
|
|
print(full_path)
|
|
|
|
with Image.open(full_path) as img:
|
|
self.image_bytes.append(img.tobytes())
|
|
|
|
def get_frames_from_tts_response(self, audio_frame) -> list[QueueFrame]:
|
|
return [
|
|
QueueFrame(FrameType.AUDIO, audio_frame),
|
|
QueueFrame(FrameType.IMAGE, random.choice(self.image_bytes))
|
|
]
|
|
|
|
|
|
def add_bot_to_room(room_url, token, expiration) -> None:
|
|
|
|
# A simple prompt for a simple sample.
|
|
message_handler = MessageHandler(
|
|
"""
|
|
You are a sample bot in a WebRTC session. You'll receive input as transcriptions of user's
|
|
speech, and your responses will be converted to audio via a TTS service.
|
|
Answer user's questions and be friendly, and if you can, give some ideas about how someone
|
|
could use a bot like you in a more in-depth way. Because your responses will be spoken,
|
|
try to keep them short.
|
|
"""
|
|
)
|
|
|
|
# Use Azure services for the TTS, image generation, and LLM.
|
|
# Note that you'll need to set the following environment variables:
|
|
# - AZURE_SPEECH_SERVICE_KEY
|
|
# - AZURE_SPEECH_SERVICE_REGION
|
|
# - AZURE_CHATGPT_KEY
|
|
# - AZURE_CHATGPT_ENDPOINT
|
|
# - AZURE_CHATGPT_DEPLOYMENT_ID
|
|
#
|
|
# This demo doesn't use image generation, but if you extend it to do so,
|
|
# you'll also need to set:
|
|
# - AZURE_DALLE_KEY
|
|
# - AZURE_DALLE_ENDPOINT
|
|
# - AZURE_DALLE_DEPLOYMENT_ID
|
|
|
|
services = AIServiceConfig(
|
|
tts=AzureTTSService(), image=AzureImageGenService(), llm=AzureLLMService()
|
|
)
|
|
|
|
sprite_conversation_processors = ConversationProcessorCollection(
|
|
introduction=IntroSpriteResponse,
|
|
waiting=WaitingSpriteResponse,
|
|
response=AnimatedSpriteLLMResponse,
|
|
)
|
|
|
|
orchestrator_config = OrchestratorConfig(
|
|
room_url=room_url,
|
|
token=token,
|
|
bot_name="Simple Bot",
|
|
expiration=expiration,
|
|
)
|
|
|
|
logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
|
|
logger: logging.Logger = logging.getLogger("dailyai")
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
orchestrator = Orchestrator(
|
|
orchestrator_config,
|
|
services,
|
|
message_handler,
|
|
sprite_conversation_processors
|
|
)
|
|
orchestrator.start()
|
|
|
|
# When the orchestrator's done, we need to shut it down,
|
|
# and the various services and handlers we've created.
|
|
orchestrator.stop()
|
|
message_handler.shutdown()
|
|
|
|
services.tts.close()
|
|
services.image.close()
|
|
services.llm.close()
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Simple Daily Bot Sample")
|
|
parser.add_argument("-u", "--url", type=str, required=True, help="URL of the Daily room")
|
|
parser.add_argument(
|
|
"-k", "--apikey", type=str, required=True, help="Daily API Key (needed to create token)"
|
|
)
|
|
|
|
args: argparse.Namespace = parser.parse_args()
|
|
|
|
# Create a meeting token for the given room with an expiration 1 hour in the future.
|
|
room_name: str = urllib.parse.urlparse(args.url).path[1:]
|
|
expiration: float = time.time() + 60 * 60
|
|
|
|
res: requests.Response = requests.post(
|
|
f"https://api.daily.co/v1/meeting-tokens",
|
|
headers={"Authorization": f"Bearer {args.apikey}"},
|
|
json={
|
|
"properties": {"room_name": room_name, "is_owner": True, "exp": expiration}
|
|
},
|
|
)
|
|
|
|
if res.status_code != 200:
|
|
raise Exception(f'Failed to create meeting token: {res.status_code} {res.text}')
|
|
|
|
token: str = res.json()['token']
|
|
|
|
add_bot_to_room(args.url, token, expiration)
|