Compare commits
3 Commits
hush/realt
...
cleanup
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
646db8b9bd | ||
|
|
42c142aff0 | ||
|
|
6da78dbf9c |
@@ -5,7 +5,6 @@ import json
|
|||||||
from openai import AsyncAzureOpenAI
|
from openai import AsyncAzureOpenAI
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import requests
|
|
||||||
|
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
|
|
||||||
@@ -16,7 +15,10 @@ from PIL import Image
|
|||||||
from azure.cognitiveservices.speech import SpeechSynthesizer, SpeechConfig, ResultReason, CancellationReason
|
from azure.cognitiveservices.speech import SpeechSynthesizer, SpeechConfig, ResultReason, CancellationReason
|
||||||
|
|
||||||
class AzureTTSService(TTSService):
|
class AzureTTSService(TTSService):
|
||||||
def __init__(self, speech_key=None, speech_region=None):
|
|
||||||
|
def __init__(
|
||||||
|
self, speech_key=None, speech_region=None, voice_name="en-US-SaraNeural"
|
||||||
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
speech_key = speech_key or os.getenv("AZURE_SPEECH_SERVICE_KEY")
|
speech_key = speech_key or os.getenv("AZURE_SPEECH_SERVICE_KEY")
|
||||||
@@ -25,11 +27,13 @@ class AzureTTSService(TTSService):
|
|||||||
self.speech_config = SpeechConfig(subscription=speech_key, region=speech_region)
|
self.speech_config = SpeechConfig(subscription=speech_key, region=speech_region)
|
||||||
self.speech_synthesizer = SpeechSynthesizer(speech_config=self.speech_config, audio_config=None)
|
self.speech_synthesizer = SpeechSynthesizer(speech_config=self.speech_config, audio_config=None)
|
||||||
|
|
||||||
|
self.voice_name = voice_name
|
||||||
|
|
||||||
async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
|
async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
|
||||||
self.logger.info("Running azure tts")
|
self.logger.info("Running azure tts")
|
||||||
ssml = "<speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' " \
|
ssml = f"<speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' " \
|
||||||
"xmlns:mstts='http://www.w3.org/2001/mstts'>" \
|
"xmlns:mstts='http://www.w3.org/2001/mstts'>" \
|
||||||
"<voice name='en-US-SaraNeural'>" \
|
f"<voice name={self.voice_name}>" \
|
||||||
"<mstts:silence type='Sentenceboundary' value='20ms' />" \
|
"<mstts:silence type='Sentenceboundary' value='20ms' />" \
|
||||||
"<mstts:express-as style='lyrical' styledegree='2' role='SeniorFemale'>" \
|
"<mstts:express-as style='lyrical' styledegree='2' role='SeniorFemale'>" \
|
||||||
"<prosody rate='1.05'>" \
|
"<prosody rate='1.05'>" \
|
||||||
@@ -92,81 +96,58 @@ class AzureLLMService(LLMService):
|
|||||||
|
|
||||||
class AzureImageGenServiceREST(ImageGenService):
|
class AzureImageGenServiceREST(ImageGenService):
|
||||||
|
|
||||||
def __init__(self, image_size:str, api_key=None, azure_endpoint=None, api_version=None, model=None):
|
def __init__(
|
||||||
|
self,
|
||||||
|
image_size: str,
|
||||||
|
api_key: str | None = None,
|
||||||
|
azure_endpoint: str | None = None,
|
||||||
|
api_version: str | None = None,
|
||||||
|
model: str | None = None,
|
||||||
|
aiohttp_session: aiohttp.ClientSession | None=None,
|
||||||
|
timeout_seconds=120,
|
||||||
|
):
|
||||||
super().__init__(image_size=image_size)
|
super().__init__(image_size=image_size)
|
||||||
self.api_key = api_key or os.getenv("AZURE_DALLE_KEY")
|
self.api_key = api_key or os.getenv("AZURE_DALLE_KEY")
|
||||||
self.azure_endpoint = azure_endpoint or os.getenv("AZURE_DALLE_ENDPOINT")
|
self.azure_endpoint = azure_endpoint or os.getenv("AZURE_DALLE_ENDPOINT")
|
||||||
self.api_version = api_version or "2023-06-01-preview"
|
self.api_version = api_version or "2023-06-01-preview"
|
||||||
self.model = model or os.getenv("AZURE_DALLE_DEPLOYMENT_ID")
|
self.model = model or os.getenv("AZURE_DALLE_DEPLOYMENT_ID")
|
||||||
|
self.aiohttp_session: aiohttp.ClientSession = (
|
||||||
|
aiohttp_session or aiohttp.ClientSession()
|
||||||
|
)
|
||||||
|
self.timeout_seconds = timeout_seconds
|
||||||
|
|
||||||
async def run_image_gen(self, sentence) -> tuple[str, bytes]:
|
async def run_image_gen(self, sentence) -> tuple[str, bytes]:
|
||||||
# TODO hoist the session to app-level
|
url = f"{self.azure_endpoint}openai/images/generations:submit?api-version={self.api_version}"
|
||||||
async with aiohttp.ClientSession() as session:
|
headers= { "api-key": self.api_key, "Content-Type": "application/json" }
|
||||||
url = f"{self.azure_endpoint}openai/images/generations:submit?api-version={self.api_version}"
|
body = {
|
||||||
headers= { "api-key": self.api_key, "Content-Type": "application/json" }
|
"prompt": sentence,
|
||||||
body = {
|
"size": self.image_size,
|
||||||
# Enter your prompt text here
|
"n": 1,
|
||||||
"prompt": sentence,
|
}
|
||||||
"size": self.image_size,
|
async with self.aiohttp_session.post(
|
||||||
"n": 1,
|
url, headers=headers, json=body
|
||||||
}
|
) as submission:
|
||||||
async with session.post(url, headers=headers, json=body) as submission:
|
operation_location = submission.headers['operation-location']
|
||||||
operation_location = submission.headers['operation-location']
|
|
||||||
|
|
||||||
status = ""
|
status = ""
|
||||||
attempts_left = 120
|
attempts_left = self.timeout_seconds
|
||||||
json_response = None
|
json_response = None
|
||||||
while status != "succeeded":
|
while status != "succeeded":
|
||||||
attempts_left -= 1
|
attempts_left -= 1
|
||||||
if attempts_left == 0:
|
if attempts_left == 0:
|
||||||
raise Exception("Image generation timed out")
|
raise Exception("Image generation timed out")
|
||||||
|
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
response = await session.get(operation_location, headers=headers)
|
response = await self.aiohttp_session.get(operation_location, headers=headers)
|
||||||
json_response = await response.json()
|
json_response = await response.json()
|
||||||
status = json_response["status"]
|
status = json_response["status"]
|
||||||
|
|
||||||
image_url = json_response["result"]["data"][0]["url"] if json_response else None
|
image_url = json_response["result"]["data"][0]["url"] if json_response else None
|
||||||
if not image_url:
|
if not image_url:
|
||||||
raise Exception("Image generation failed")
|
raise Exception("Image generation failed")
|
||||||
|
|
||||||
# Load the image from the url
|
# Load the image from the url
|
||||||
async with session.get(image_url) as response:
|
async with self.aiohttp_session.get(image_url) as response:
|
||||||
image_stream = io.BytesIO(await response.content.read())
|
image_stream = io.BytesIO(await response.content.read())
|
||||||
image = Image.open(image_stream)
|
image = Image.open(image_stream)
|
||||||
return (image_url, image.tobytes())
|
return (image_url, image.tobytes())
|
||||||
|
|
||||||
|
|
||||||
class AzureImageGenService(ImageGenService):
|
|
||||||
|
|
||||||
def __init__(self, api_key=None, azure_endpoint=None, api_version=None, model=None):
|
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
api_key = api_key or os.getenv("AZURE_DALLE_KEY")
|
|
||||||
azure_endpoint = azure_endpoint or os.getenv("AZURE_DALLE_ENDPOINT")
|
|
||||||
api_version = api_version or "2023-06-01-preview"
|
|
||||||
self.model = model or os.getenv("AZURE_DALLE_DEPLOYMENT_ID")
|
|
||||||
|
|
||||||
self.client = AzureOpenAI(
|
|
||||||
api_key=api_key,
|
|
||||||
azure_endpoint=azure_endpoint,
|
|
||||||
api_version=api_version,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def run_image_gen(self, sentence) -> tuple[str, bytes]:
|
|
||||||
self.logger.info("Generating azure image", sentence)
|
|
||||||
|
|
||||||
image = self.client.images.generate(
|
|
||||||
model=self.model,
|
|
||||||
prompt=sentence,
|
|
||||||
n=1,
|
|
||||||
size=self.image_size,
|
|
||||||
)
|
|
||||||
|
|
||||||
url = image["data"][0]["url"]
|
|
||||||
response = requests.get(url)
|
|
||||||
|
|
||||||
dalle_stream = io.BytesIO(response.content)
|
|
||||||
dalle_im = Image.open(dalle_stream.tobytes())
|
|
||||||
|
|
||||||
return (url, dalle_im)
|
|
||||||
|
|||||||
@@ -9,28 +9,30 @@ from dailyai.services.ai_services import TTSService
|
|||||||
|
|
||||||
|
|
||||||
class ElevenLabsTTSService(TTSService):
|
class ElevenLabsTTSService(TTSService):
|
||||||
def __init__(self, api_key=None, voice_id=None):
|
def __init__(self, api_key=None, voice_id=None, aiohttp_session:aiohttp.ClientSession=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
|
self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
|
||||||
self.voice_id = voice_id or os.getenv("ELEVENLABS_VOICE_ID")
|
self.voice_id = voice_id or os.getenv("ELEVENLABS_VOICE_ID")
|
||||||
|
self.aiohttp_session = aiohttp_session or aiohttp.ClientSession()
|
||||||
|
|
||||||
async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
|
async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
|
||||||
async with aiohttp.ClientSession() as session:
|
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
|
||||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
|
payload = {"text": sentence, "model_id": "eleven_turbo_v2"}
|
||||||
payload = {"text": sentence, "model_id": "eleven_turbo_v2"}
|
querystring = {"output_format": "pcm_16000", "optimize_streaming_latency": 2}
|
||||||
querystring = {"output_format": "pcm_16000", "optimize_streaming_latency": 2}
|
headers = {
|
||||||
headers = {
|
"xi-api-key": self.api_key,
|
||||||
"xi-api-key": self.api_key,
|
"Content-Type": "application/json",
|
||||||
"Content-Type": "application/json",
|
}
|
||||||
}
|
async with self.aiohttp_session.post(
|
||||||
async with session.post(url, json=payload, headers=headers, params=querystring) as r:
|
url, json=payload, headers=headers, params=querystring
|
||||||
if r.status != 200:
|
) as r:
|
||||||
self.logger.error(
|
if r.status != 200:
|
||||||
f"audio fetch status code: {r.status}, error: {r.text}"
|
self.logger.error(
|
||||||
)
|
f"audio fetch status code: {r.status}, error: {r.text}"
|
||||||
return
|
)
|
||||||
|
return
|
||||||
|
|
||||||
async for chunk in r.content:
|
async for chunk in r.content:
|
||||||
if chunk:
|
if chunk:
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|||||||
@@ -34,9 +34,9 @@ class FalImageGenService(ImageGenService):
|
|||||||
raise Exception("Image generation failed")
|
raise Exception("Image generation failed")
|
||||||
|
|
||||||
return image_url
|
return image_url
|
||||||
print(f"fetching image url...")
|
print("fetching image url...")
|
||||||
image_url = await asyncio.to_thread(get_image_url, sentence, self.image_size)
|
image_url = await asyncio.to_thread(get_image_url, sentence, self.image_size)
|
||||||
print(f"got image url, downloading image...")
|
print("got image url, downloading image...")
|
||||||
# Load the image from the url
|
# Load the image from the url
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.get(image_url) as response:
|
async with session.get(image_url) as response:
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
import requests
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import asyncio
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import io
|
import io
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
@@ -9,7 +7,7 @@ import os
|
|||||||
import json
|
import json
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
|
|
||||||
from dailyai.services.ai_services import AIService, TTSService, LLMService, ImageGenService
|
from dailyai.services.ai_services import LLMService, ImageGenService
|
||||||
|
|
||||||
|
|
||||||
class OpenAILLMService(LLMService):
|
class OpenAILLMService(LLMService):
|
||||||
@@ -50,11 +48,19 @@ class OpenAILLMService(LLMService):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
class OpenAIImageGenService(ImageGenService):
|
class OpenAIImageGenService(ImageGenService):
|
||||||
def __init__(self, image_size:str, api_key=None, model=None):
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
image_size: str,
|
||||||
|
api_key=None,
|
||||||
|
model=None,
|
||||||
|
aiohttp_session: aiohttp.ClientSession | None = None,
|
||||||
|
):
|
||||||
super().__init__(image_size=image_size)
|
super().__init__(image_size=image_size)
|
||||||
api_key = api_key or os.getenv("OPEN_AI_KEY")
|
api_key = api_key or os.getenv("OPEN_AI_KEY")
|
||||||
self.model = model or os.getenv("OPEN_AI_IMAGE_MODEL") or "dall-e-3"
|
self.model = model or os.getenv("OPEN_AI_IMAGE_MODEL") or "dall-e-3"
|
||||||
self.client = AsyncOpenAI(api_key=api_key)
|
self.client = AsyncOpenAI(api_key=api_key)
|
||||||
|
self.aiohttp_session=aiohttp_session or aiohttp.ClientSession()
|
||||||
|
|
||||||
async def run_image_gen(self, sentence) -> tuple[str, bytes]:
|
async def run_image_gen(self, sentence) -> tuple[str, bytes]:
|
||||||
self.logger.info("Generating OpenAI image", sentence)
|
self.logger.info("Generating OpenAI image", sentence)
|
||||||
@@ -70,10 +76,7 @@ class OpenAIImageGenService(ImageGenService):
|
|||||||
raise Exception("No image provided in response", image)
|
raise Exception("No image provided in response", image)
|
||||||
|
|
||||||
# Load the image from the url
|
# Load the image from the url
|
||||||
async with aiohttp.ClientSession() as session:
|
async with self.aiohttp_session.get(image_url) as response:
|
||||||
async with session.get(image_url) as response:
|
image_stream = io.BytesIO(await response.content.read())
|
||||||
image_stream = io.BytesIO(await response.content.read())
|
image = Image.open(image_stream)
|
||||||
image = Image.open(image_stream)
|
return (image_url, image.tobytes())
|
||||||
return (image_url, image.tobytes())
|
|
||||||
|
|
||||||
return (image_url, dalle_im.tobytes())
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import asyncio
|
|||||||
from dailyai.queue_frame import TextQueueFrame
|
from dailyai.queue_frame import TextQueueFrame
|
||||||
from dailyai.services.daily_transport_service import DailyTransportService
|
from dailyai.services.daily_transport_service import DailyTransportService
|
||||||
from dailyai.services.open_ai_services import OpenAIImageGenService
|
from dailyai.services.open_ai_services import OpenAIImageGenService
|
||||||
|
from dailyai.services.azure_ai_services import AzureImageGenServiceREST
|
||||||
|
|
||||||
local_joined = False
|
local_joined = False
|
||||||
participant_joined = False
|
participant_joined = False
|
||||||
@@ -21,7 +22,7 @@ async def main(room_url):
|
|||||||
transport.camera_width = 1024
|
transport.camera_width = 1024
|
||||||
transport.camera_height = 1024
|
transport.camera_height = 1024
|
||||||
|
|
||||||
imagegen = OpenAIImageGenService(image_size="1024x1024")
|
imagegen = AzureImageGenServiceREST(image_size="1024x1024")
|
||||||
image_task = asyncio.create_task(
|
image_task = asyncio.create_task(
|
||||||
imagegen.run_to_queue(transport.send_queue, [TextQueueFrame("a cat in the style of picasso")])
|
imagegen.run_to_queue(transport.send_queue, [TextQueueFrame("a cat in the style of picasso")])
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import argparse
|
|||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from dailyai.queue_frame import AudioQueueFrame, ImageQueueFrame
|
from dailyai.queue_frame import AudioQueueFrame, ImageQueueFrame
|
||||||
from dailyai.services.azure_ai_services import AzureLLMService
|
from dailyai.services.azure_ai_services import AzureImageGenServiceREST, AzureLLMService
|
||||||
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
|
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
|
||||||
from dailyai.services.daily_transport_service import DailyTransportService
|
from dailyai.services.daily_transport_service import DailyTransportService
|
||||||
from dailyai.services.fal_ai_services import FalImageGenService
|
from dailyai.services.fal_ai_services import FalImageGenService
|
||||||
@@ -22,9 +22,9 @@ async def main(room_url):
|
|||||||
transport.camera_height = 1024
|
transport.camera_height = 1024
|
||||||
|
|
||||||
llm = AzureLLMService()
|
llm = AzureLLMService()
|
||||||
dalle = FalImageGenService(image_size="1024x1024")
|
#dalle = FalImageGenService(image_size="1024x1024")
|
||||||
tts = ElevenLabsTTSService(voice_id="ErXwobaYiN019PkySvjV")
|
tts = ElevenLabsTTSService(voice_id="ErXwobaYiN019PkySvjV")
|
||||||
# dalle = OpenAIImageGenService(image_size="1024x1024")
|
dalle = AzureImageGenServiceREST(image_size="1024x1024")
|
||||||
|
|
||||||
# Get a complete audio chunk from the given text. Splitting this into its own
|
# Get a complete audio chunk from the given text. Splitting this into its own
|
||||||
# coroutine lets us ensure proper ordering of the audio chunks on the send queue.
|
# coroutine lets us ensure proper ordering of the audio chunks on the send queue.
|
||||||
|
|||||||
Reference in New Issue
Block a user