From 25ff8ef37bc3e83e417f7141345143bd16cf38a1 Mon Sep 17 00:00:00 2001 From: jqueguiner Date: Thu, 5 Jun 2025 16:51:29 -0700 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20(config.py):=20add=20new=20configur?= =?UTF-8?q?ation=20options=20for=20lip-sync=20optimization,=20context=20ad?= =?UTF-8?q?aptation,=20and=20additional=20context=20to=20enhance=20transla?= =?UTF-8?q?tion=20accuracy=20=E2=99=BB=EF=B8=8F=20(stt.py):=20increase=20d?= =?UTF-8?q?efault=20max=20buffer=20size=20from=205MB=20to=2020MB=20to=20ac?= =?UTF-8?q?commodate=20larger=20audio=20data=20=E2=99=BB=EF=B8=8F=20(stt.p?= =?UTF-8?q?y):=20simplify=20audio=20sending=20logic=20by=20removing=20chun?= =?UTF-8?q?king=20and=20sending=20the=20entire=20buffered=20audio=20at=20o?= =?UTF-8?q?nce=20for=20improved=20performance?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/pipecat/services/gladia/config.py | 6 ++++++ src/pipecat/services/gladia/stt.py | 18 +++++------------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/pipecat/services/gladia/config.py b/src/pipecat/services/gladia/config.py index 275554418..1e325686f 100644 --- a/src/pipecat/services/gladia/config.py +++ b/src/pipecat/services/gladia/config.py @@ -74,11 +74,17 @@ class TranslationConfig(BaseModel): target_languages: List of target language codes for translation model: Translation model to use ("base" or "enhanced") match_original_utterances: Whether to align translations with original utterances + lipsync: Whether to enable lip-sync optimization for translations + context_adaptation: Whether to enable context-aware translation adaptation + context: Additional context to help with translation accuracy """ target_languages: Optional[List[str]] = None model: Optional[str] = None match_original_utterances: Optional[bool] = None + lipsync: Optional[bool] = None + context_adaptation: Optional[bool] = None + context: Optional[str] = None class RealtimeProcessingConfig(BaseModel): diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index b07fd0345..20eafc393 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -197,7 +197,7 @@ class GladiaSTTService(STTService): params: Optional[GladiaInputParams] = None, max_reconnection_attempts: int = 5, reconnection_delay: float = 1.0, - max_buffer_size: int = 1024 * 1024 * 5, # 5MB default buffer + max_buffer_size: int = 1024 * 1024 * 20, # 20MB default buffer **kwargs, ): """Initialize the Gladia STT service. @@ -207,8 +207,7 @@ class GladiaSTTService(STTService): url: Gladia API URL confidence: Minimum confidence threshold for transcriptions sample_rate: Audio sample rate in Hz - model: Model to use ("solaria-1", "solaria-mini-1", "fast", - or "accurate") + model: Model to use ("solaria-1") params: Additional configuration parameters max_reconnection_attempts: Maximum number of reconnection attempts reconnection_delay: Initial delay between reconnection attempts (exponential backoff) @@ -507,16 +506,9 @@ class GladiaSTTService(STTService): async def _send_buffered_audio(self): """Send any buffered audio after reconnection.""" async with self._buffer_lock: - if self._bytes_sent < len(self._audio_buffer): - buffered_data = self._audio_buffer[self._bytes_sent :] - if buffered_data: - logger.info(f"Sending {len(buffered_data)} bytes of buffered audio") - # Send in chunks to avoid overwhelming the connection - chunk_size = 16384 # 16KB chunks - for i in range(0, len(buffered_data), chunk_size): - chunk = buffered_data[i : i + chunk_size] - await self._send_audio(bytes(chunk)) - await asyncio.sleep(0.01) # Small delay between chunks + if self._audio_buffer: + logger.info(f"Sending {len(self._audio_buffer)} bytes of buffered audio") + await self._send_audio(bytes(self._audio_buffer)) async def _send_stop_recording(self): if self._websocket and not self._websocket.closed: