From 4bafdaa04d2aa86105e05ee6a6cd2741d96a3532 Mon Sep 17 00:00:00 2001 From: Ankur Duggal <38927181+ankykong@users.noreply.github.com> Date: Thu, 25 Jul 2024 09:51:51 -0700 Subject: [PATCH] Deepgram Adjustments (#313) --- CHANGELOG.md | 2 ++ src/pipecat/services/deepgram.py | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 37ef909fe..63532b463 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -137,6 +137,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added missing * keyword separators in services. +- `DeepgramTTS` now is more customizable. You can adjust the encoding and sample rate. + ### Fixed - `WebsocketServerTransport` doesn't try to send frames anymore if serializers diff --git a/src/pipecat/services/deepgram.py b/src/pipecat/services/deepgram.py index e3a9ee478..f582664c3 100644 --- a/src/pipecat/services/deepgram.py +++ b/src/pipecat/services/deepgram.py @@ -49,6 +49,8 @@ class DeepgramTTSService(TTSService): api_key: str, voice: str = "aura-helios-en", base_url: str = "https://api.deepgram.com/v1/speak", + sample_rate: int = 16000, + encoding: str = "linear16", **kwargs): super().__init__(**kwargs) @@ -56,6 +58,8 @@ class DeepgramTTSService(TTSService): self._api_key = api_key self._aiohttp_session = aiohttp_session self._base_url = base_url + self._sample_rate = sample_rate + self._encoding = encoding def can_generate_metrics(self) -> bool: return True @@ -68,7 +72,7 @@ class DeepgramTTSService(TTSService): logger.debug(f"Generating TTS: [{text}]") base_url = self._base_url - request_url = f"{base_url}?model={self._voice}&encoding=linear16&container=none&sample_rate=16000" + request_url = f"{base_url}?model={self._voice}&encoding={self._encoding}&container=none&sample_rate={self._sample_rate}" headers = {"authorization": f"token {self._api_key}"} body = {"text": text} @@ -91,7 +95,7 @@ class DeepgramTTSService(TTSService): async for data in r.content: await self.stop_ttfb_metrics() - frame = AudioRawFrame(audio=data, sample_rate=16000, num_channels=1) + frame = AudioRawFrame(audio=data, sample_rate=self._sample_rate, num_channels=1) yield frame except Exception as e: logger.exception(f"{self} exception: {e}")