✨ (config.py): add new configuration options for lip-sync optimization, context adaptation, and additional context to enhance translation accuracy
♻️ (stt.py): increase default max buffer size from 5MB to 20MB to accommodate larger audio data ♻️ (stt.py): simplify audio sending logic by removing chunking and sending the entire buffered audio at once for improved performance
This commit is contained in:
@@ -74,11 +74,17 @@ class TranslationConfig(BaseModel):
|
||||
target_languages: List of target language codes for translation
|
||||
model: Translation model to use ("base" or "enhanced")
|
||||
match_original_utterances: Whether to align translations with original utterances
|
||||
lipsync: Whether to enable lip-sync optimization for translations
|
||||
context_adaptation: Whether to enable context-aware translation adaptation
|
||||
context: Additional context to help with translation accuracy
|
||||
"""
|
||||
|
||||
target_languages: Optional[List[str]] = None
|
||||
model: Optional[str] = None
|
||||
match_original_utterances: Optional[bool] = None
|
||||
lipsync: Optional[bool] = None
|
||||
context_adaptation: Optional[bool] = None
|
||||
context: Optional[str] = None
|
||||
|
||||
|
||||
class RealtimeProcessingConfig(BaseModel):
|
||||
|
||||
@@ -197,7 +197,7 @@ class GladiaSTTService(STTService):
|
||||
params: Optional[GladiaInputParams] = None,
|
||||
max_reconnection_attempts: int = 5,
|
||||
reconnection_delay: float = 1.0,
|
||||
max_buffer_size: int = 1024 * 1024 * 5, # 5MB default buffer
|
||||
max_buffer_size: int = 1024 * 1024 * 20, # 20MB default buffer
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the Gladia STT service.
|
||||
@@ -207,8 +207,7 @@ class GladiaSTTService(STTService):
|
||||
url: Gladia API URL
|
||||
confidence: Minimum confidence threshold for transcriptions
|
||||
sample_rate: Audio sample rate in Hz
|
||||
model: Model to use ("solaria-1", "solaria-mini-1", "fast",
|
||||
or "accurate")
|
||||
model: Model to use ("solaria-1")
|
||||
params: Additional configuration parameters
|
||||
max_reconnection_attempts: Maximum number of reconnection attempts
|
||||
reconnection_delay: Initial delay between reconnection attempts (exponential backoff)
|
||||
@@ -507,16 +506,9 @@ class GladiaSTTService(STTService):
|
||||
async def _send_buffered_audio(self):
|
||||
"""Send any buffered audio after reconnection."""
|
||||
async with self._buffer_lock:
|
||||
if self._bytes_sent < len(self._audio_buffer):
|
||||
buffered_data = self._audio_buffer[self._bytes_sent :]
|
||||
if buffered_data:
|
||||
logger.info(f"Sending {len(buffered_data)} bytes of buffered audio")
|
||||
# Send in chunks to avoid overwhelming the connection
|
||||
chunk_size = 16384 # 16KB chunks
|
||||
for i in range(0, len(buffered_data), chunk_size):
|
||||
chunk = buffered_data[i : i + chunk_size]
|
||||
await self._send_audio(bytes(chunk))
|
||||
await asyncio.sleep(0.01) # Small delay between chunks
|
||||
if self._audio_buffer:
|
||||
logger.info(f"Sending {len(self._audio_buffer)} bytes of buffered audio")
|
||||
await self._send_audio(bytes(self._audio_buffer))
|
||||
|
||||
async def _send_stop_recording(self):
|
||||
if self._websocket and not self._websocket.closed:
|
||||
|
||||
Reference in New Issue
Block a user