Merge pull request #2008 from pipecat-ai/khk/groq-audio

fix groq wav file header parsing
This commit is contained in:
Jon Taylor
2025-06-16 14:09:09 +01:00
committed by GitHub
2 changed files with 25 additions and 16 deletions

View File

@@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Fixed an issue with `GroqTTSService` where it was not properly parsing the
WAV file header.
- Fixed an issue with `GoogleSTTService` where it was constantly reconnecting
before starting to receive audio from the user.

View File

@@ -4,6 +4,8 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import io
import wave
from typing import AsyncGenerator, Optional
from loguru import logger
@@ -78,22 +80,26 @@ class GroqTTSService(TTSService):
await self.start_ttfb_metrics()
yield TTSStartedFrame()
response = await self._client.audio.speech.create(
model=self._model_name,
voice=self._voice_id,
response_format=self._output_format,
input=text,
)
try:
response = await self._client.audio.speech.create(
model=self._model_name,
voice=self._voice_id,
response_format=self._output_format,
input=text,
)
async for data in response.iter_bytes():
if measuring_ttfb:
await self.stop_ttfb_metrics()
measuring_ttfb = False
# remove wav header if present
if data.startswith(b"RIFF"):
data = data[44:]
if len(data) == 0:
continue
yield TTSAudioRawFrame(data, self.sample_rate, 1)
async for data in response.iter_bytes():
if measuring_ttfb:
await self.stop_ttfb_metrics()
measuring_ttfb = False
with wave.open(io.BytesIO(data)) as w:
channels = w.getnchannels()
frame_rate = w.getframerate()
num_frames = w.getnframes()
bytes = w.readframes(num_frames)
yield TTSAudioRawFrame(bytes, frame_rate, channels)
except Exception as e:
logger.error(f"{self} exception: {e}")
yield TTSStoppedFrame()