Merge pull request #3252 from pipecat-ai/aleix/vision-response-frames

add vision response and text frames
This commit is contained in:
Aleix Conchillo Flaqué
2025-12-17 11:01:06 -08:00
committed by GitHub
4 changed files with 36 additions and 6 deletions

2
changelog/3252.added.md Normal file
View File

@@ -0,0 +1,2 @@
- Added `VisionFullResponseStartFrame`, `VisionFullResponseEndFrame` and
`VisionTextFrame`. This are used by vision services similar to LLM services.

View File

@@ -0,0 +1,2 @@
- `MoondreamService` now pushes `VisionFullResponseStartFrame`,
`VisionFullResponseEndFrame` and `VisionTextFrame`.

View File

@@ -400,6 +400,13 @@ class AggregatedTextFrame(TextFrame):
aggregated_by: AggregationType | str
@dataclass
class VisionTextFrame(LLMTextFrame):
"""Text frame generated by vision services."""
pass
@dataclass
class TTSTextFrame(AggregatedTextFrame):
"""Text frame generated by Text-to-Speech services."""
@@ -1766,6 +1773,25 @@ class FunctionCallInProgressFrame(ControlFrame, UninterruptibleFrame):
cancel_on_interruption: bool = False
@dataclass
class VisionFullResponseStartFrame(LLMFullResponseStartFrame):
"""Frame indicating the beginning of a vision model response.
Used to indicate the beginning of a vision model response. Followed by one
or more VisionTextFrames and a final VisionFullResponseEndFrame.
"""
pass
@dataclass
class VisionFullResponseEndFrame(LLMFullResponseEndFrame):
"""Frame indicating the end of a Vision model response."""
pass
@dataclass
class TTSStartedFrame(ControlFrame):
"""Frame indicating the beginning of a TTS response.

View File

@@ -19,8 +19,10 @@ from PIL import Image
from pipecat.frames.frames import (
ErrorFrame,
Frame,
TextFrame,
UserImageRawFrame,
VisionFullResponseEndFrame,
VisionFullResponseStartFrame,
VisionTextFrame,
)
from pipecat.services.vision_service import VisionService
@@ -104,10 +106,6 @@ class MoondreamService(VisionService):
Args:
frame: The image frame to process.
Yields:
Frame: TextFrame containing the generated image description, or ErrorFrame
if analysis fails.
"""
if not self._model:
yield ErrorFrame("Moondream model not available")
@@ -123,4 +121,6 @@ class MoondreamService(VisionService):
description = await asyncio.to_thread(get_image_description, frame.image, frame.text)
yield TextFrame(text=description)
yield VisionFullResponseStartFrame()
yield VisionTextFrame(text=description)
yield VisionFullResponseEndFrame()