From 25ff8ef37bc3e83e417f7141345143bd16cf38a1 Mon Sep 17 00:00:00 2001
From: jqueguiner <jlqueguiner@gladia.io>
Date: Thu, 5 Jun 2025 16:51:29 -0700
Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20(config.py):=20add=20new=20configur?=
 =?UTF-8?q?ation=20options=20for=20lip-sync=20optimization,=20context=20ad?=
 =?UTF-8?q?aptation,=20and=20additional=20context=20to=20enhance=20transla?=
 =?UTF-8?q?tion=20accuracy=20=E2=99=BB=EF=B8=8F=20(stt.py):=20increase=20d?=
 =?UTF-8?q?efault=20max=20buffer=20size=20from=205MB=20to=2020MB=20to=20ac?=
 =?UTF-8?q?commodate=20larger=20audio=20data=20=E2=99=BB=EF=B8=8F=20(stt.p?=
 =?UTF-8?q?y):=20simplify=20audio=20sending=20logic=20by=20removing=20chun?=
 =?UTF-8?q?king=20and=20sending=20the=20entire=20buffered=20audio=20at=20o?=
 =?UTF-8?q?nce=20for=20improved=20performance?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/pipecat/services/gladia/config.py |  6 ++++++
 src/pipecat/services/gladia/stt.py    | 18 +++++-------------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/pipecat/services/gladia/config.py b/src/pipecat/services/gladia/config.py
index 275554418..1e325686f 100644
--- a/src/pipecat/services/gladia/config.py
+++ b/src/pipecat/services/gladia/config.py
@@ -74,11 +74,17 @@ class TranslationConfig(BaseModel):
         target_languages: List of target language codes for translation
         model: Translation model to use ("base" or "enhanced")
         match_original_utterances: Whether to align translations with original utterances
+        lipsync: Whether to enable lip-sync optimization for translations
+        context_adaptation: Whether to enable context-aware translation adaptation
+        context: Additional context to help with translation accuracy
     """
 
     target_languages: Optional[List[str]] = None
     model: Optional[str] = None
     match_original_utterances: Optional[bool] = None
+    lipsync: Optional[bool] = None
+    context_adaptation: Optional[bool] = None
+    context: Optional[str] = None
 
 
 class RealtimeProcessingConfig(BaseModel):
diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py
index b07fd0345..20eafc393 100644
--- a/src/pipecat/services/gladia/stt.py
+++ b/src/pipecat/services/gladia/stt.py
@@ -197,7 +197,7 @@ class GladiaSTTService(STTService):
         params: Optional[GladiaInputParams] = None,
         max_reconnection_attempts: int = 5,
         reconnection_delay: float = 1.0,
-        max_buffer_size: int = 1024 * 1024 * 5,  # 5MB default buffer
+        max_buffer_size: int = 1024 * 1024 * 20,  # 20MB default buffer
         **kwargs,
     ):
         """Initialize the Gladia STT service.
@@ -207,8 +207,7 @@ class GladiaSTTService(STTService):
             url: Gladia API URL
             confidence: Minimum confidence threshold for transcriptions
             sample_rate: Audio sample rate in Hz
-            model: Model to use ("solaria-1", "solaria-mini-1", "fast",
-                or "accurate")
+            model: Model to use ("solaria-1")
             params: Additional configuration parameters
             max_reconnection_attempts: Maximum number of reconnection attempts
             reconnection_delay: Initial delay between reconnection attempts (exponential backoff)
@@ -507,16 +506,9 @@ class GladiaSTTService(STTService):
     async def _send_buffered_audio(self):
         """Send any buffered audio after reconnection."""
         async with self._buffer_lock:
-            if self._bytes_sent < len(self._audio_buffer):
-                buffered_data = self._audio_buffer[self._bytes_sent :]
-                if buffered_data:
-                    logger.info(f"Sending {len(buffered_data)} bytes of buffered audio")
-                    # Send in chunks to avoid overwhelming the connection
-                    chunk_size = 16384  # 16KB chunks
-                    for i in range(0, len(buffered_data), chunk_size):
-                        chunk = buffered_data[i : i + chunk_size]
-                        await self._send_audio(bytes(chunk))
-                        await asyncio.sleep(0.01)  # Small delay between chunks
+            if self._audio_buffer:
+                logger.info(f"Sending {len(self._audio_buffer)} bytes of buffered audio")
+                await self._send_audio(bytes(self._audio_buffer))
 
     async def _send_stop_recording(self):
         if self._websocket and not self._websocket.closed: