diff --git a/src/pipecat/services/cartesia.py b/src/pipecat/services/cartesia.py index f2d8c9b14..9c730a283 100644 --- a/src/pipecat/services/cartesia.py +++ b/src/pipecat/services/cartesia.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: BSD 2-Clause License # +import time from cartesia.tts import AsyncCartesiaTTS @@ -40,6 +41,8 @@ class CartesiaTTSService(TTSService): logger.error(f"Cartesia initialization error: {e}") async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: + start_time = time.time() + ttfb = None logger.debug(f"Generating TTS: [{text}]") try: @@ -52,6 +55,9 @@ class CartesiaTTSService(TTSService): ) async for chunk in chunk_generator: + if ttfb is None: + ttfb = time.time() - start_time + logger.debug(f"TTS ttfb: {ttfb}") yield AudioRawFrame(chunk["audio"], chunk["sampling_rate"], 1) except Exception as e: logger.error(f"Cartesia exception: {e}") diff --git a/src/pipecat/services/deepgram.py b/src/pipecat/services/deepgram.py index b5901825e..7b19e04e2 100644 --- a/src/pipecat/services/deepgram.py +++ b/src/pipecat/services/deepgram.py @@ -5,6 +5,7 @@ # import aiohttp +import time from typing import AsyncGenerator @@ -30,6 +31,8 @@ class DeepgramTTSService(TTSService): self._aiohttp_session = aiohttp_session async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: + start_time = time.time() + ttfb = None logger.debug(f"Generating TTS: [{text}]") base_url = "https://api.deepgram.com/v1/speak" @@ -46,6 +49,9 @@ class DeepgramTTSService(TTSService): return async for data in r.content: + if ttfb is None: + ttfb = time.time() - start_time + logger.debug(f"TTS ttfb: {ttfb}") frame = AudioRawFrame(audio=data, sample_rate=16000, num_channels=1) yield frame except Exception as e: diff --git a/src/pipecat/services/elevenlabs.py b/src/pipecat/services/elevenlabs.py index 3d602595d..d5b476160 100644 --- a/src/pipecat/services/elevenlabs.py +++ b/src/pipecat/services/elevenlabs.py @@ -5,6 +5,7 @@ # import aiohttp +import time from typing import AsyncGenerator @@ -32,6 +33,8 @@ class ElevenLabsTTSService(TTSService): self._model = model async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: + start_time = time.time() + ttfb = None logger.debug(f"Generating TTS: [{text}]") url = f"https://api.elevenlabs.io/v1/text-to-speech/{self._voice_id}/stream" @@ -56,5 +59,8 @@ class ElevenLabsTTSService(TTSService): async for chunk in r.content: if len(chunk) > 0: + if ttfb is None: + ttfb = time.time() - start_time + logger.debug(f"TTS ttfb: {ttfb}") frame = AudioRawFrame(chunk, 16000, 1) yield frame