diff --git a/changelog/XXX.added.md b/changelog/XXX.added.md
new file mode 100644
index 000000000..6bbcd038c
--- /dev/null
+++ b/changelog/XXX.added.md
@@ -0,0 +1 @@
+- Added an approximation of TTFB for Ultravox.
diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py
index 06e62c052..71dcf4cf7 100644
--- a/src/pipecat/services/ultravox/llm.py
+++ b/src/pipecat/services/ultravox/llm.py
@@ -43,6 +43,7 @@ from pipecat.frames.frames import (
     TTSStoppedFrame,
     TTSTextFrame,
     UserAudioRawFrame,
+    UserStoppedSpeakingFrame,
 )
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response import (
@@ -340,6 +341,13 @@ class UltravoxRealtimeLLMService(LLMService):
         elif isinstance(frame, InputAudioRawFrame):
             await self._send_user_audio(frame)
             await self.push_frame(frame, direction)
+        elif isinstance(frame, UserStoppedSpeakingFrame):
+            # This may or may not align with Ultravox's end of user speech detection,
+            # which relies on a more complex endpointing model. In particular it will
+            # yield a seemingly very slow TTFB in the case of endpointing false
+            # negatives. It will be close in the majority of cases though.
+            await self.start_ttfb_metrics()
+            await self.push_frame(frame, direction)
         else:
             await self.push_frame(frame, direction)
 
@@ -462,6 +470,7 @@ class UltravoxRealtimeLLMService(LLMService):
         if not audio:
             return
         if not self._bot_responding:
+            await self.stop_ttfb_metrics()
             await self.push_frame(LLMFullResponseStartFrame())
             await self.push_frame(TTSStartedFrame())
             self._bot_responding = "voice"
@@ -507,6 +516,7 @@ class UltravoxRealtimeLLMService(LLMService):
             await self.push_frame(frame)
         if medium == "text":
             if text:
+                await self.stop_ttfb_metrics()
                 await self.push_frame(LLMFullResponseStartFrame())
                 await self.push_frame(TTSStartedFrame())
                 await self.push_frame(TTSTextFrame(text=text, aggregated_by=AggregationType.WORD))