(config.py): add new configuration options for lip-sync optimization, context adaptation, and additional context to enhance translation accuracy

♻️ (stt.py): increase default max buffer size from 5MB to 20MB to accommodate larger audio data
♻️ (stt.py): simplify audio sending logic by removing chunking and sending the entire buffered audio at once for improved performance
This commit is contained in:
jqueguiner
2025-06-05 16:51:29 -07:00
parent 02cc6f3d56
commit 25ff8ef37b
2 changed files with 11 additions and 13 deletions

View File

@@ -74,11 +74,17 @@ class TranslationConfig(BaseModel):
target_languages: List of target language codes for translation
model: Translation model to use ("base" or "enhanced")
match_original_utterances: Whether to align translations with original utterances
lipsync: Whether to enable lip-sync optimization for translations
context_adaptation: Whether to enable context-aware translation adaptation
context: Additional context to help with translation accuracy
"""
target_languages: Optional[List[str]] = None
model: Optional[str] = None
match_original_utterances: Optional[bool] = None
lipsync: Optional[bool] = None
context_adaptation: Optional[bool] = None
context: Optional[str] = None
class RealtimeProcessingConfig(BaseModel):

View File

@@ -197,7 +197,7 @@ class GladiaSTTService(STTService):
params: Optional[GladiaInputParams] = None,
max_reconnection_attempts: int = 5,
reconnection_delay: float = 1.0,
max_buffer_size: int = 1024 * 1024 * 5, # 5MB default buffer
max_buffer_size: int = 1024 * 1024 * 20, # 20MB default buffer
**kwargs,
):
"""Initialize the Gladia STT service.
@@ -207,8 +207,7 @@ class GladiaSTTService(STTService):
url: Gladia API URL
confidence: Minimum confidence threshold for transcriptions
sample_rate: Audio sample rate in Hz
model: Model to use ("solaria-1", "solaria-mini-1", "fast",
or "accurate")
model: Model to use ("solaria-1")
params: Additional configuration parameters
max_reconnection_attempts: Maximum number of reconnection attempts
reconnection_delay: Initial delay between reconnection attempts (exponential backoff)
@@ -507,16 +506,9 @@ class GladiaSTTService(STTService):
async def _send_buffered_audio(self):
"""Send any buffered audio after reconnection."""
async with self._buffer_lock:
if self._bytes_sent < len(self._audio_buffer):
buffered_data = self._audio_buffer[self._bytes_sent :]
if buffered_data:
logger.info(f"Sending {len(buffered_data)} bytes of buffered audio")
# Send in chunks to avoid overwhelming the connection
chunk_size = 16384 # 16KB chunks
for i in range(0, len(buffered_data), chunk_size):
chunk = buffered_data[i : i + chunk_size]
await self._send_audio(bytes(chunk))
await asyncio.sleep(0.01) # Small delay between chunks
if self._audio_buffer:
logger.info(f"Sending {len(self._audio_buffer)} bytes of buffered audio")
await self._send_audio(bytes(self._audio_buffer))
async def _send_stop_recording(self):
if self._websocket and not self._websocket.closed: