diff --git a/examples/foundational/07q-interruptible-rime-http.py b/examples/foundational/07q-interruptible-rime-http.py index 19d032413..40fc6be5f 100644 --- a/examples/foundational/07q-interruptible-rime-http.py +++ b/examples/foundational/07q-interruptible-rime-http.py @@ -44,7 +44,8 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac tts = RimeHttpTTSService( api_key=os.getenv("RIME_API_KEY", ""), - voice_id="rex", + voice_id="luna", + model="arcana", aiohttp_session=session, ) diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 57ade7484..a26a54805 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -400,6 +400,13 @@ class RimeHttpTTSService(TTSService): payload["modelId"] = self._model_name payload["samplingRate"] = self.sample_rate + # Arcana does not support PCM audio + if payload["modelId"] == "arcana": + headers["Accept"] = "audio/wav" + need_to_strip_wav_header = True + else: + need_to_strip_wav_header = False + try: await self.start_ttfb_metrics() @@ -420,6 +427,10 @@ class RimeHttpTTSService(TTSService): CHUNK_SIZE = 1024 async for chunk in response.content.iter_chunked(CHUNK_SIZE): + if need_to_strip_wav_header and chunk.startswith(b"RIFF"): + chunk = chunk[44:] + need_to_strip_wav_header = False + if len(chunk) > 0: await self.stop_ttfb_metrics() frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)