From 1e3fa4a9c7fa94aa4957909dffc0d9c15d5a4b9d Mon Sep 17 00:00:00 2001 From: Kwindla Hultman Kramer Date: Sat, 14 Jun 2025 17:41:44 -0400 Subject: [PATCH 1/2] fix groq wav file header parsing --- src/pipecat/services/groq/tts.py | 38 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index 6f73b1629..33fd3ce34 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -4,6 +4,8 @@ # SPDX-License-Identifier: BSD 2-Clause License # +import io +import wave from typing import AsyncGenerator, Optional from loguru import logger @@ -78,22 +80,26 @@ class GroqTTSService(TTSService): await self.start_ttfb_metrics() yield TTSStartedFrame() - response = await self._client.audio.speech.create( - model=self._model_name, - voice=self._voice_id, - response_format=self._output_format, - input=text, - ) + try: + response = await self._client.audio.speech.create( + model=self._model_name, + voice=self._voice_id, + response_format=self._output_format, + input=text, + ) - async for data in response.iter_bytes(): - if measuring_ttfb: - await self.stop_ttfb_metrics() - measuring_ttfb = False - # remove wav header if present - if data.startswith(b"RIFF"): - data = data[44:] - if len(data) == 0: - continue - yield TTSAudioRawFrame(data, self.sample_rate, 1) + async for data in response.iter_bytes(): + if measuring_ttfb: + await self.stop_ttfb_metrics() + measuring_ttfb = False + + with wave.open(io.BytesIO(data)) as w: + channels = w.getnchannels() + frame_rate = w.getframerate() + num_frames = w.getnframes() + bytes = w.readframes(num_frames) + yield TTSAudioRawFrame(bytes, frame_rate, channels) + except Exception as e: + logger.error(f"{self} exception: {e}") yield TTSStoppedFrame() From fe16ed3c73882c42e1cbbc1020757cad15112bc0 Mon Sep 17 00:00:00 2001 From: Kwindla Hultman Kramer Date: Sun, 15 Jun 2025 10:49:40 -0700 Subject: [PATCH 2/2] added changelog entry --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c8648272..cfbeb5010 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed an issue with `GroqTTSService` where it was not properly parsing the + WAV file header. + - Fixed an issue with `GoogleSTTService` where it was constantly reconnecting before starting to receive audio from the user.