diff --git a/core/duplex_pipeline.py b/core/duplex_pipeline.py index b80cc36..b6ae930 100644 --- a/core/duplex_pipeline.py +++ b/core/duplex_pipeline.py @@ -108,6 +108,8 @@ class DuplexPipeline: self._is_bot_speaking = False self._current_turn_task: Optional[asyncio.Task] = None self._audio_buffer: bytes = b"" + max_buffer_seconds = settings.max_audio_buffer_seconds if hasattr(settings, "max_audio_buffer_seconds") else 30 + self._max_audio_buffer_bytes = int(settings.sample_rate * 2 * max_buffer_seconds) self._last_vad_status: str = "Silence" # Interruption handling @@ -263,6 +265,9 @@ class DuplexPipeline: # 3. Buffer audio for ASR if vad_status == "Speech" or self.conversation.state == ConversationState.LISTENING: self._audio_buffer += pcm_bytes + if len(self._audio_buffer) > self._max_audio_buffer_bytes: + # Keep only the most recent audio to cap memory usage + self._audio_buffer = self._audio_buffer[-self._max_audio_buffer_bytes:] await self.asr_service.send_audio(pcm_bytes) # For SiliconFlow ASR, trigger interim transcription periodically