From 4bafdaa04d2aa86105e05ee6a6cd2741d96a3532 Mon Sep 17 00:00:00 2001
From: Ankur Duggal <38927181+ankykong@users.noreply.github.com>
Date: Thu, 25 Jul 2024 09:51:51 -0700
Subject: [PATCH] Deepgram Adjustments (#313)

---
 CHANGELOG.md                     | 2 ++
 src/pipecat/services/deepgram.py | 8 ++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 37ef909fe..63532b463 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -137,6 +137,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Added missing * keyword separators in services.
 
+- `DeepgramTTS` now is more customizable. You can adjust the encoding and sample rate.
+
 ### Fixed
 
 - `WebsocketServerTransport` doesn't try to send frames anymore if serializers
diff --git a/src/pipecat/services/deepgram.py b/src/pipecat/services/deepgram.py
index e3a9ee478..f582664c3 100644
--- a/src/pipecat/services/deepgram.py
+++ b/src/pipecat/services/deepgram.py
@@ -49,6 +49,8 @@ class DeepgramTTSService(TTSService):
             api_key: str,
             voice: str = "aura-helios-en",
             base_url: str = "https://api.deepgram.com/v1/speak",
+            sample_rate: int = 16000,
+            encoding: str = "linear16",
             **kwargs):
         super().__init__(**kwargs)
 
@@ -56,6 +58,8 @@ class DeepgramTTSService(TTSService):
         self._api_key = api_key
         self._aiohttp_session = aiohttp_session
         self._base_url = base_url
+        self._sample_rate = sample_rate
+        self._encoding = encoding
 
     def can_generate_metrics(self) -> bool:
         return True
@@ -68,7 +72,7 @@ class DeepgramTTSService(TTSService):
         logger.debug(f"Generating TTS: [{text}]")
 
         base_url = self._base_url
-        request_url = f"{base_url}?model={self._voice}&encoding=linear16&container=none&sample_rate=16000"
+        request_url = f"{base_url}?model={self._voice}&encoding={self._encoding}&container=none&sample_rate={self._sample_rate}"
         headers = {"authorization": f"token {self._api_key}"}
         body = {"text": text}
 
@@ -91,7 +95,7 @@ class DeepgramTTSService(TTSService):
 
                 async for data in r.content:
                     await self.stop_ttfb_metrics()
-                    frame = AudioRawFrame(audio=data, sample_rate=16000, num_channels=1)
+                    frame = AudioRawFrame(audio=data, sample_rate=self._sample_rate, num_channels=1)
                     yield frame
         except Exception as e:
             logger.exception(f"{self} exception: {e}")