@@ -25,20 +25,21 @@ from dailyai.services.openai_api_llm_service import BaseOpenAILLMService
|
||||
|
||||
|
||||
class AzureTTSService(TTSService):
|
||||
def __init__(self, *, api_key, region):
|
||||
def __init__(self, *, api_key, region, voice="en-US-SaraNeural"):
|
||||
super().__init__()
|
||||
|
||||
self.speech_config = SpeechConfig(subscription=api_key, region=region)
|
||||
self.speech_synthesizer = SpeechSynthesizer(
|
||||
speech_config=self.speech_config, audio_config=None
|
||||
)
|
||||
self._voice = voice
|
||||
|
||||
async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
|
||||
self.logger.info("Running azure tts")
|
||||
ssml = (
|
||||
"<speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' "
|
||||
"xmlns:mstts='http://www.w3.org/2001/mstts'>"
|
||||
"<voice name='en-US-SaraNeural'>"
|
||||
f"<voice name='{self._voice}'>"
|
||||
"<mstts:silence type='Sentenceboundary' value='20ms' />"
|
||||
"<mstts:express-as style='lyrical' styledegree='2' role='SeniorFemale'>"
|
||||
"<prosody rate='1.05'>"
|
||||
|
||||
@@ -16,16 +16,18 @@ class ElevenLabsTTSService(TTSService):
|
||||
aiohttp_session: aiohttp.ClientSession,
|
||||
api_key,
|
||||
voice_id,
|
||||
model="eleven_turbo_v2",
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self._api_key = api_key
|
||||
self._voice_id = voice_id
|
||||
self._aiohttp_session = aiohttp_session
|
||||
self._model = model
|
||||
|
||||
async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
|
||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self._voice_id}/stream"
|
||||
payload = {"text": sentence, "model_id": "eleven_turbo_v2"}
|
||||
payload = {"text": sentence, "model_id": self._model}
|
||||
querystring = {"output_format": "pcm_16000", "optimize_streaming_latency": 2}
|
||||
headers = {
|
||||
"xi-api-key": self._api_key,
|
||||
|
||||
84
src/examples/starter-apps/translator.py
Normal file
84
src/examples/starter-apps/translator.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import logging
|
||||
import os
|
||||
from PIL import Image
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from dailyai.pipeline.aggregators import (
|
||||
LLMResponseAggregator,
|
||||
UserResponseAggregator,
|
||||
SentenceAggregator,
|
||||
)
|
||||
from dailyai.pipeline.frames import Frame, LLMMessagesQueueFrame, TextFrame
|
||||
from dailyai.pipeline.frame_processor import FrameProcessor
|
||||
from dailyai.services.ai_services import AIService, FrameLogger
|
||||
from dailyai.pipeline.pipeline import Pipeline
|
||||
from dailyai.services.daily_transport_service import DailyTransportService
|
||||
from dailyai.services.azure_ai_services import AzureTTSService
|
||||
from dailyai.services.open_ai_services import OpenAILLMService
|
||||
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
|
||||
from examples.support.runner import configure
|
||||
|
||||
logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
|
||||
logger = logging.getLogger("dailyai")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
"""
|
||||
This example looks a bit different than the chatbot example, because it isn't waiting on the user to stop talking to start translating.
|
||||
It also isn't saving what the user or bot says into the context object for use in subsequent interactions.
|
||||
"""
|
||||
|
||||
|
||||
# We need to use a custom service here to yield LLM frames without saving any context
|
||||
class TranslationProcessor(FrameProcessor):
|
||||
def __init__(self, language):
|
||||
self._language = language
|
||||
|
||||
async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
|
||||
if isinstance(frame, TextFrame):
|
||||
context = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"You will be provided with a sentence in English, and your task is to translate it into {self._language}.",
|
||||
},
|
||||
{"role": "user", "content": frame.text},
|
||||
]
|
||||
yield LLMMessagesQueueFrame(context)
|
||||
else:
|
||||
yield frame
|
||||
|
||||
|
||||
async def main(room_url: str, token):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
transport = DailyTransportService(
|
||||
room_url,
|
||||
token,
|
||||
"Translator",
|
||||
duration_minutes=5,
|
||||
start_transcription=True,
|
||||
mic_enabled=True,
|
||||
mic_sample_rate=16000,
|
||||
camera_enabled=False,
|
||||
vad_enabled=True,
|
||||
)
|
||||
tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
voice="es-ES-AlvaroNeural",
|
||||
)
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
|
||||
)
|
||||
sa = SentenceAggregator()
|
||||
tp = TranslationProcessor("Spanish")
|
||||
pipeline = Pipeline([sa, tp, llm, tts])
|
||||
|
||||
transport.transcription_settings["extra"]["endpointing"] = True
|
||||
transport.transcription_settings["extra"]["punctuate"] = True
|
||||
await transport.run(pipeline)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(url, token) = configure()
|
||||
asyncio.run(main(url, token))
|
||||
Reference in New Issue
Block a user