AudioBufferProcessor: add start_recording()/stop_recording()
This commit is contained in:
@@ -67,6 +67,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- `resample_audio()` is now deprecated, use `create_default_resampler()`
|
||||
instead.
|
||||
|
||||
### Removed
|
||||
|
||||
- `AudioBufferProcessor.reset_audio_buffers()` has been removed, use
|
||||
`AudioBufferProcessor.start_recording()` and
|
||||
``AudioBufferProcessor.stop_recording()` instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `AudioBufferProcessor` that would cause crackling in some recordings.
|
||||
|
||||
@@ -124,6 +124,7 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await audio_buffer_processor.start_recording()
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
|
||||
@@ -109,8 +109,9 @@ async def main():
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
# Save audio every 10 seconds.
|
||||
audiobuffer = AudioBufferProcessor(buffer_size=480000)
|
||||
# NOTE: Watch out! This will save all the conversation in memory. You
|
||||
# can pass `buffer_size` to get periodic callbacks.
|
||||
audiobuffer = AudioBufferProcessor()
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -132,6 +133,7 @@ async def main():
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await audiobuffer.start_recording()
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
StartFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
@@ -47,6 +46,8 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
self._last_user_frame_at = 0
|
||||
self._last_bot_frame_at = 0
|
||||
|
||||
self._recording = False
|
||||
|
||||
self._resampler = create_default_resampler()
|
||||
|
||||
self._register_event_handler("on_audio_data")
|
||||
@@ -74,21 +75,18 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
else:
|
||||
return b""
|
||||
|
||||
def reset_audio_buffers(self):
|
||||
self._user_audio_buffer = bytearray()
|
||||
self._bot_audio_buffer = bytearray()
|
||||
async def start_recording(self):
|
||||
self._recording = True
|
||||
self._reset_recording()
|
||||
|
||||
self._last_user_frame_at = time.time()
|
||||
self._last_bot_frame_at = time.time()
|
||||
async def stop_recording(self):
|
||||
await self._call_on_audio_data_handler()
|
||||
self._recording = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartFrame):
|
||||
self._last_user_frame_at = time.time()
|
||||
self._last_bot_frame_at = time.time()
|
||||
|
||||
if isinstance(frame, InputAudioRawFrame):
|
||||
if self._recording and isinstance(frame, InputAudioRawFrame):
|
||||
# Add silence if we need to.
|
||||
silence = self._compute_silence(self._last_user_frame_at)
|
||||
self._user_audio_buffer.extend(silence)
|
||||
@@ -97,7 +95,7 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
self._user_audio_buffer.extend(resampled)
|
||||
# Save time of frame so we can compute silence.
|
||||
self._last_user_frame_at = time.time()
|
||||
elif isinstance(frame, OutputAudioRawFrame):
|
||||
elif self._recording and isinstance(frame, OutputAudioRawFrame):
|
||||
# Add silence if we need to.
|
||||
silence = self._compute_silence(self._last_bot_frame_at)
|
||||
self._bot_audio_buffer.extend(silence)
|
||||
@@ -111,23 +109,32 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
await self._call_on_audio_data_handler()
|
||||
|
||||
if isinstance(frame, (CancelFrame, EndFrame)):
|
||||
await self._call_on_audio_data_handler()
|
||||
await self.stop_recording()
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
async def _call_on_audio_data_handler(self):
|
||||
if not self.has_audio():
|
||||
if not self.has_audio() or not self._recording:
|
||||
return
|
||||
|
||||
merged_audio = self.merge_audio_buffers()
|
||||
await self._call_event_handler(
|
||||
"on_audio_data", merged_audio, self._sample_rate, self._num_channels
|
||||
)
|
||||
self.reset_audio_buffers()
|
||||
self._reset_audio_buffers()
|
||||
|
||||
def _buffer_has_audio(self, buffer: bytearray) -> bool:
|
||||
return buffer is not None and len(buffer) > 0
|
||||
|
||||
def _reset_recording(self):
|
||||
self._reset_audio_buffers()
|
||||
self._last_user_frame_at = time.time()
|
||||
self._last_bot_frame_at = time.time()
|
||||
|
||||
def _reset_audio_buffers(self):
|
||||
self._user_audio_buffer = bytearray()
|
||||
self._bot_audio_buffer = bytearray()
|
||||
|
||||
async def _resample_audio(self, frame: AudioRawFrame) -> bytes:
|
||||
return await self._resampler.resample(frame.audio, frame.sample_rate, self._sample_rate)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user