added timing logs for cartesia, deepgram, elevenlabs

This commit is contained in:
Kwindla Hultman Kramer
2024-06-05 13:33:02 -04:00
parent 2d1ed9a304
commit 06ff9cfede
3 changed files with 18 additions and 0 deletions

View File

@@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import time
from cartesia.tts import AsyncCartesiaTTS
@@ -40,6 +41,8 @@ class CartesiaTTSService(TTSService):
logger.error(f"Cartesia initialization error: {e}")
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
start_time = time.time()
ttfb = None
logger.debug(f"Generating TTS: [{text}]")
try:
@@ -52,6 +55,9 @@ class CartesiaTTSService(TTSService):
)
async for chunk in chunk_generator:
if ttfb is None:
ttfb = time.time() - start_time
logger.debug(f"TTS ttfb: {ttfb}")
yield AudioRawFrame(chunk["audio"], chunk["sampling_rate"], 1)
except Exception as e:
logger.error(f"Cartesia exception: {e}")

View File

@@ -5,6 +5,7 @@
#
import aiohttp
import time
from typing import AsyncGenerator
@@ -30,6 +31,8 @@ class DeepgramTTSService(TTSService):
self._aiohttp_session = aiohttp_session
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
start_time = time.time()
ttfb = None
logger.debug(f"Generating TTS: [{text}]")
base_url = "https://api.deepgram.com/v1/speak"
@@ -46,6 +49,9 @@ class DeepgramTTSService(TTSService):
return
async for data in r.content:
if ttfb is None:
ttfb = time.time() - start_time
logger.debug(f"TTS ttfb: {ttfb}")
frame = AudioRawFrame(audio=data, sample_rate=16000, num_channels=1)
yield frame
except Exception as e:

View File

@@ -5,6 +5,7 @@
#
import aiohttp
import time
from typing import AsyncGenerator
@@ -32,6 +33,8 @@ class ElevenLabsTTSService(TTSService):
self._model = model
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
start_time = time.time()
ttfb = None
logger.debug(f"Generating TTS: [{text}]")
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self._voice_id}/stream"
@@ -56,5 +59,8 @@ class ElevenLabsTTSService(TTSService):
async for chunk in r.content:
if len(chunk) > 0:
if ttfb is None:
ttfb = time.time() - start_time
logger.debug(f"TTS ttfb: {ttfb}")
frame = AudioRawFrame(chunk, 16000, 1)
yield frame