Allowing to buffer audio inside the base output transport.
This commit is contained in:
@@ -823,6 +823,23 @@ class BaseOutputTransport(FrameProcessor):
|
||||
|
||||
async def _audio_task_handler(self):
|
||||
"""Main audio processing task handler."""
|
||||
# Pre-buffer: accumulate audio before sending anything to the transport.
|
||||
#
|
||||
# prebuffer is a list while we are still accumulating, and None once the
|
||||
# threshold has been reached and all held frames have been flushed. Using
|
||||
# None as the sentinel avoids a boolean flag and makes the steady-state
|
||||
# branch a simple identity check.
|
||||
#
|
||||
# The pre-buffer resets automatically on each interruption because the
|
||||
# audio task is cancelled and recreated, giving the next utterance a fresh
|
||||
# local variable.
|
||||
min_prebuffer_bytes = (
|
||||
int(self._sample_rate * self._params.audio_out_prebuffer_secs)
|
||||
* 2
|
||||
* self._params.audio_out_channels
|
||||
)
|
||||
prebuffer: list[OutputAudioRawFrame] | None = [] if min_prebuffer_bytes > 0 else None
|
||||
|
||||
async for frame in self._next_frame():
|
||||
# No need to push EndFrame, it's pushed from process_frame().
|
||||
if isinstance(frame, EndFrame):
|
||||
@@ -840,7 +857,20 @@ class BaseOutputTransport(FrameProcessor):
|
||||
# Try to send audio to the transport.
|
||||
try:
|
||||
if isinstance(frame, OutputAudioRawFrame):
|
||||
push_downstream = await self._transport.write_audio_frame(frame)
|
||||
if prebuffer is not None:
|
||||
# Accumulation phase: hold frames until we have enough audio.
|
||||
prebuffer.append(frame)
|
||||
if sum(len(f.audio) for f in prebuffer) >= min_prebuffer_bytes:
|
||||
# Threshold reached: flush all held frames at once, then
|
||||
# switch to direct-write mode for the rest of the utterance.
|
||||
for f in prebuffer:
|
||||
await self._transport.write_audio_frame(f)
|
||||
prebuffer = None
|
||||
# push_downstream stays True so frames flow through the
|
||||
# pipeline even while we are still accumulating.
|
||||
else:
|
||||
# Steady-state: write directly to the transport.
|
||||
push_downstream = await self._transport.write_audio_frame(frame)
|
||||
except Exception as e:
|
||||
logger.error(f"{self} Error writing {frame} to transport: {e}")
|
||||
push_downstream = False
|
||||
|
||||
@@ -34,6 +34,8 @@ class TransportParams(BaseModel):
|
||||
audio_out_mixer: Audio mixer instance or destination mapping.
|
||||
audio_out_destinations: List of audio output destination identifiers.
|
||||
audio_out_end_silence_secs: How much silence to send after an EndFrame (0 for no silence).
|
||||
audio_out_prebuffer_secs: Seconds of audio to accumulate before sending anything to the
|
||||
transport. Resets automatically on each interruption. Defaults to 0.0 (disabled).
|
||||
audio_out_auto_silence: Insert silence frames when the audio output queue is empty.
|
||||
When False, the transport will wait for audio data instead of inserting silence.
|
||||
audio_in_enabled: Enable audio input streaming.
|
||||
@@ -70,6 +72,7 @@ class TransportParams(BaseModel):
|
||||
audio_out_mixer: BaseAudioMixer | Mapping[str | None, BaseAudioMixer] | None = None
|
||||
audio_out_destinations: list[str] = Field(default_factory=list)
|
||||
audio_out_end_silence_secs: int = 2
|
||||
audio_out_prebuffer_secs: float = 0.0
|
||||
audio_out_auto_silence: bool = True
|
||||
audio_in_enabled: bool = False
|
||||
audio_in_sample_rate: int | None = None
|
||||
|
||||
@@ -168,11 +168,15 @@ class TavusParams(DailyParams):
|
||||
audio_in_enabled: Whether to enable audio input from participants.
|
||||
audio_out_enabled: Whether to enable audio output to participants.
|
||||
microphone_out_enabled: Whether to enable microphone output track.
|
||||
audio_out_prebuffer_secs: Seconds of audio to accumulate before sending to WebRTC.
|
||||
Absorbs TTS jitter to prevent the WebRTC jitter buffer from injecting silence.
|
||||
Defaults to 0.5.
|
||||
"""
|
||||
|
||||
audio_in_enabled: bool = True
|
||||
audio_out_enabled: bool = True
|
||||
microphone_out_enabled: bool = False
|
||||
audio_out_prebuffer_secs: float = 0.5
|
||||
|
||||
|
||||
class TavusTransportClient:
|
||||
|
||||
Reference in New Issue
Block a user