Tavus support for custom output rate

2025-03-11 16:02:33 -07:00
parent 5bd359ada9
commit ecc4411128
1 changed files with 5 additions and 2 deletions
--- a/src/pipecat/services/tavus.py
+++ b/src/pipecat/services/tavus.py
@@ -37,6 +37,7 @@ class TavusVideoService(AIService):
        replica_id: str,
        persona_id: str = "pipecat0",  # Use `pipecat0` so that your TTS voice is used in place of the Tavus persona
        session: aiohttp.ClientSession,
+        output_sample_rate: int = 16000,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
@@ -44,6 +45,7 @@ class TavusVideoService(AIService):
        self._replica_id = replica_id
        self._persona_id = persona_id
        self._session = session
+        self._output_sample_rate = output_sample_rate

        self._conversation_id: str

@@ -94,7 +96,7 @@ class TavusVideoService(AIService):
    async def _encode_audio_and_send(self, audio: bytes, in_rate: int, done: bool) -> None:
        """Encodes audio to base64 and sends it to Tavus"""
        if not done:
-            audio = await self._resampler.resample(audio, in_rate, 16000)
+            audio = await self._resampler.resample(audio, in_rate, self._output_sample_rate)
        audio_base64 = base64.b64encode(audio).decode("utf-8")
        logger.trace(f"{self}: sending {len(audio)} bytes")
        await self._send_audio_message(audio_base64, done=done)
@@ -108,7 +110,7 @@ class TavusVideoService(AIService):
        elif isinstance(frame, TTSAudioRawFrame):
            await self._encode_audio_and_send(frame.audio, frame.sample_rate, done=False)
        elif isinstance(frame, TTSStoppedFrame):
-            await self._encode_audio_and_send(b"\x00", 16000, done=True)
+            await self._encode_audio_and_send(b"\x00", self._output_sample_rate, done=True)
            await self.stop_ttfb_metrics()
            await self.stop_processing_metrics()
        elif isinstance(frame, StartInterruptionFrame):
@@ -137,6 +139,7 @@ class TavusVideoService(AIService):
                    "inference_id": self._current_idx_str,
                    "audio": audio_base64,
                    "done": done,
+                    "sample_rate": self._output_sample_rate,
                },
            }
        )