From d3d50ac5805d5adc4da9e0e17d7d9dd9d245eaf2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= <aleix@daily.co>
Date: Wed, 17 Dec 2025 10:34:54 -0800
Subject: [PATCH 1/2] frames: added vision response and text frames

---
 changelog/3252.added.md      |  2 ++
 src/pipecat/frames/frames.py | 26 ++++++++++++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 changelog/3252.added.md

diff --git a/changelog/3252.added.md b/changelog/3252.added.md
new file mode 100644
index 000000000..c6f85b713
--- /dev/null
+++ b/changelog/3252.added.md
@@ -0,0 +1,2 @@
+- Added `VisionFullResponseStartFrame`, `VisionFullResponseEndFrame` and
+  `VisionTextFrame`. This are used by vision services similar to LLM services.
diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py
index 8d2a51333..998ca3cf0 100644
--- a/src/pipecat/frames/frames.py
+++ b/src/pipecat/frames/frames.py
@@ -400,6 +400,13 @@ class AggregatedTextFrame(TextFrame):
     aggregated_by: AggregationType | str
 
 
+@dataclass
+class VisionTextFrame(LLMTextFrame):
+    """Text frame generated by vision services."""
+
+    pass
+
+
 @dataclass
 class TTSTextFrame(AggregatedTextFrame):
     """Text frame generated by Text-to-Speech services."""
@@ -1766,6 +1773,25 @@ class FunctionCallInProgressFrame(ControlFrame, UninterruptibleFrame):
     cancel_on_interruption: bool = False
 
 
+@dataclass
+class VisionFullResponseStartFrame(LLMFullResponseStartFrame):
+    """Frame indicating the beginning of a vision model response.
+
+    Used to indicate the beginning of a vision model response. Followed by one
+    or more VisionTextFrames and a final VisionFullResponseEndFrame.
+
+    """
+
+    pass
+
+
+@dataclass
+class VisionFullResponseEndFrame(LLMFullResponseEndFrame):
+    """Frame indicating the end of a Vision model response."""
+
+    pass
+
+
 @dataclass
 class TTSStartedFrame(ControlFrame):
     """Frame indicating the beginning of a TTS response.

From 159e403ae4f9847adde8349664e173bfdb74a1c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= <aleix@daily.co>
Date: Wed, 17 Dec 2025 10:36:04 -0800
Subject: [PATCH 2/2] MoondreamService: yield vision response and text frames

---
 changelog/3252.changed.md                |  2 ++
 src/pipecat/services/moondream/vision.py | 12 ++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)
 create mode 100644 changelog/3252.changed.md

diff --git a/changelog/3252.changed.md b/changelog/3252.changed.md
new file mode 100644
index 000000000..11cdca2ff
--- /dev/null
+++ b/changelog/3252.changed.md
@@ -0,0 +1,2 @@
+- `MoondreamService` now pushes `VisionFullResponseStartFrame`,
+  `VisionFullResponseEndFrame` and `VisionTextFrame`.
diff --git a/src/pipecat/services/moondream/vision.py b/src/pipecat/services/moondream/vision.py
index e9ce86383..a5dc5af4c 100644
--- a/src/pipecat/services/moondream/vision.py
+++ b/src/pipecat/services/moondream/vision.py
@@ -19,8 +19,10 @@ from PIL import Image
 from pipecat.frames.frames import (
     ErrorFrame,
     Frame,
-    TextFrame,
     UserImageRawFrame,
+    VisionFullResponseEndFrame,
+    VisionFullResponseStartFrame,
+    VisionTextFrame,
 )
 from pipecat.services.vision_service import VisionService
 
@@ -104,10 +106,6 @@ class MoondreamService(VisionService):
 
         Args:
             frame: The image frame to process.
-
-        Yields:
-            Frame: TextFrame containing the generated image description, or ErrorFrame
-                  if analysis fails.
         """
         if not self._model:
             yield ErrorFrame("Moondream model not available")
@@ -123,4 +121,6 @@ class MoondreamService(VisionService):
 
         description = await asyncio.to_thread(get_image_description, frame.image, frame.text)
 
-        yield TextFrame(text=description)
+        yield VisionFullResponseStartFrame()
+        yield VisionTextFrame(text=description)
+        yield VisionFullResponseEndFrame()