✨ (config.py): add new configuration options for lip-sync optimization, context adaptation, and additional context to enhance translation accuracy

♻️ (stt.py): increase default max buffer size from 5MB to 20MB to accommodate larger audio data ♻️ (stt.py): simplify audio sending logic by removing chunking and sending the entire buffered audio at once for improved performance
2025-06-05 16:51:29 -07:00
parent 02cc6f3d56
commit 25ff8ef37b
2 changed files with 11 additions and 13 deletions
--- a/src/pipecat/services/gladia/config.py
+++ b/src/pipecat/services/gladia/config.py
@@ -74,11 +74,17 @@ class TranslationConfig(BaseModel):
        target_languages: List of target language codes for translation
        model: Translation model to use ("base" or "enhanced")
        match_original_utterances: Whether to align translations with original utterances
+        lipsync: Whether to enable lip-sync optimization for translations
+        context_adaptation: Whether to enable context-aware translation adaptation
+        context: Additional context to help with translation accuracy
    """

    target_languages: Optional[List[str]] = None
    model: Optional[str] = None
    match_original_utterances: Optional[bool] = None
+    lipsync: Optional[bool] = None
+    context_adaptation: Optional[bool] = None
+    context: Optional[str] = None


 class RealtimeProcessingConfig(BaseModel):
--- a/src/pipecat/services/gladia/stt.py
+++ b/src/pipecat/services/gladia/stt.py
@@ -197,7 +197,7 @@ class GladiaSTTService(STTService):
        params: Optional[GladiaInputParams] = None,
        max_reconnection_attempts: int = 5,
        reconnection_delay: float = 1.0,
-        max_buffer_size: int = 1024 * 1024 * 5,  # 5MB default buffer
+        max_buffer_size: int = 1024 * 1024 * 20,  # 20MB default buffer
        **kwargs,
    ):
        """Initialize the Gladia STT service.
@@ -207,8 +207,7 @@ class GladiaSTTService(STTService):
            url: Gladia API URL
            confidence: Minimum confidence threshold for transcriptions
            sample_rate: Audio sample rate in Hz
-            model: Model to use ("solaria-1", "solaria-mini-1", "fast",
-                or "accurate")
+            model: Model to use ("solaria-1")
            params: Additional configuration parameters
            max_reconnection_attempts: Maximum number of reconnection attempts
            reconnection_delay: Initial delay between reconnection attempts (exponential backoff)
@@ -507,16 +506,9 @@ class GladiaSTTService(STTService):
    async def _send_buffered_audio(self):
        """Send any buffered audio after reconnection."""
        async with self._buffer_lock:
-            if self._bytes_sent < len(self._audio_buffer):
-                buffered_data = self._audio_buffer[self._bytes_sent :]
-                if buffered_data:
-                    logger.info(f"Sending {len(buffered_data)} bytes of buffered audio")
-                    # Send in chunks to avoid overwhelming the connection
-                    chunk_size = 16384  # 16KB chunks
-                    for i in range(0, len(buffered_data), chunk_size):
-                        chunk = buffered_data[i : i + chunk_size]
-                        await self._send_audio(bytes(chunk))
-                        await asyncio.sleep(0.01)  # Small delay between chunks
+            if self._audio_buffer:
+                logger.info(f"Sending {len(self._audio_buffer)} bytes of buffered audio")
+                await self._send_audio(bytes(self._audio_buffer))

    async def _send_stop_recording(self):
        if self._websocket and not self._websocket.closed: