diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index dec047d81..3c649cf42 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -441,9 +441,9 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): try: if not self._started: await self.start_ttfb_metrics() - await self.start_word_timestamps() yield TTSStartedFrame() self._started = True + self._first_chunk = True self._cumulative_audio_offset = 0.0 ssml = self._construct_ssml(text) @@ -457,6 +457,12 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): break await self.stop_ttfb_metrics() + + # Start word timestamps when first chunk arrives + if self._first_chunk: + await self.start_word_timestamps() + self._first_chunk = False + frame = TTSAudioRawFrame( audio=chunk, sample_rate=self.sample_rate,