Add sync_with_audio support for OutputImageRawFrame
Add a `sync_with_audio` field to `OutputImageRawFrame` that routes image frames through the audio queue in the output transport, ensuring images are only displayed after all preceding audio has been sent. This enables proper audio/image synchronization in pipelines like the calendar month narration example. Update the 05-sync-speech-and-image example to use an `ImageAudioSync` processor that sets this flag on image frames.
This commit is contained in:
@@ -16,6 +16,7 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMContextFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
OutputImageRawFrame,
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -44,6 +45,18 @@ class MonthFrame(DataFrame):
|
||||
return f"{self.name}(month: {self.month})"
|
||||
|
||||
|
||||
class ImageAudioSync(FrameProcessor):
|
||||
"""Marks output image frames to be synchronized with audio playback."""
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, OutputImageRawFrame):
|
||||
frame.sync_with_audio = True
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class MonthPrepender(FrameProcessor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
@@ -129,7 +142,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
sentence_aggregator, # Aggregates LLM output into full sentences
|
||||
SyncParallelPipeline( # Run pipelines in parallel aggregating the result
|
||||
[month_prepender, tts], # Create "Month: sentence" and output audio
|
||||
[imagegen], # Generate image
|
||||
[
|
||||
imagegen, # Generate image
|
||||
ImageAudioSync(), # Mark image as needing sync output w/audio
|
||||
],
|
||||
),
|
||||
transport.output(), # Transport output
|
||||
]
|
||||
|
||||
@@ -274,8 +274,16 @@ class OutputImageRawFrame(DataFrame, ImageRawFrame):
|
||||
An image that will be shown by the transport. If the transport supports
|
||||
multiple video destinations (e.g. multiple video tracks) the destination
|
||||
name can be specified in transport_destination.
|
||||
|
||||
Parameters:
|
||||
sync_with_audio: If True, the image is queued with audio frames so
|
||||
it is only displayed after all preceding audio has been sent.
|
||||
Defaults to False (image is displayed immediately when the output
|
||||
transport receives it).
|
||||
"""
|
||||
|
||||
sync_with_audio: bool = field(default=False, init=False)
|
||||
|
||||
def __str__(self):
|
||||
pts = format_pts(self.pts)
|
||||
return f"{self.name}(pts: {pts}, destination: {self.transport_destination}, size: {self.size}, format: {self.format})"
|
||||
|
||||
@@ -569,7 +569,11 @@ class BaseOutputTransport(FrameProcessor):
|
||||
if not self._params.video_out_enabled:
|
||||
return
|
||||
|
||||
if self._params.video_out_is_live and isinstance(frame, OutputImageRawFrame):
|
||||
if isinstance(frame, OutputImageRawFrame) and frame.sync_with_audio:
|
||||
# Route through the audio queue so the image is only
|
||||
# displayed after all preceding audio has been sent.
|
||||
await self._audio_queue.put(frame)
|
||||
elif self._params.video_out_is_live and isinstance(frame, OutputImageRawFrame):
|
||||
await self._video_queue.put(frame)
|
||||
elif isinstance(frame, OutputImageRawFrame):
|
||||
await self._set_video_image(frame)
|
||||
|
||||
Reference in New Issue
Block a user