Add ttfb tracking for Ultravox

This commit is contained in:
Mike Depinet
2025-12-19 09:26:11 -08:00
parent 5e94b20562
commit 5ec08ff1d8
2 changed files with 11 additions and 0 deletions

1
changelog/XXX.added.md Normal file
View File

@@ -0,0 +1 @@
- Added an approximation of TTFB for Ultravox.

View File

@@ -43,6 +43,7 @@ from pipecat.frames.frames import (
TTSStoppedFrame,
TTSTextFrame,
UserAudioRawFrame,
UserStoppedSpeakingFrame,
)
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response import (
@@ -340,6 +341,13 @@ class UltravoxRealtimeLLMService(LLMService):
elif isinstance(frame, InputAudioRawFrame):
await self._send_user_audio(frame)
await self.push_frame(frame, direction)
elif isinstance(frame, UserStoppedSpeakingFrame):
# This may or may not align with Ultravox's end of user speech detection,
# which relies on a more complex endpointing model. In particular it will
# yield a seemingly very slow TTFB in the case of endpointing false
# negatives. It will be close in the majority of cases though.
await self.start_ttfb_metrics()
await self.push_frame(frame, direction)
else:
await self.push_frame(frame, direction)
@@ -462,6 +470,7 @@ class UltravoxRealtimeLLMService(LLMService):
if not audio:
return
if not self._bot_responding:
await self.stop_ttfb_metrics()
await self.push_frame(LLMFullResponseStartFrame())
await self.push_frame(TTSStartedFrame())
self._bot_responding = "voice"
@@ -507,6 +516,7 @@ class UltravoxRealtimeLLMService(LLMService):
await self.push_frame(frame)
if medium == "text":
if text:
await self.stop_ttfb_metrics()
await self.push_frame(LLMFullResponseStartFrame())
await self.push_frame(TTSStartedFrame())
await self.push_frame(TTSTextFrame(text=text, aggregated_by=AggregationType.WORD))