openai: remove from_image_frame() and use add_image_frame_message()

This commit is contained in:
Aleix Conchillo Flaqué
2024-12-10 17:08:12 -08:00
parent 246c825a82
commit a618bd3fa6
2 changed files with 8 additions and 28 deletions

View File

@@ -19,7 +19,6 @@ from pipecat.frames.frames import (
Frame,
FunctionCallInProgressFrame,
FunctionCallResultFrame,
VisionImageRawFrame,
)
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -71,28 +70,6 @@ class OpenAILLMContext:
context.add_message(message)
return context
# todo: deprecate from_image_frame. It's only used to create a single-use
# context, which isn't useful for most real-world applications.
@staticmethod
def from_image_frame(frame: VisionImageRawFrame) -> "OpenAILLMContext":
"""
For images, we are deviating from the OpenAI messages shape. OpenAI
expects images to be base64 encoded, but other vision models may not.
So we'll store the image as bytes and do the base64 encoding as needed
in the LLM service.
NOTE: the above only applies to the deprecated use of this method. The
add_image_frame_message() below does the base64 encoding as expected
in the OpenAI format.
"""
context = OpenAILLMContext()
buffer = io.BytesIO()
Image.frombytes(frame.format, frame.size, frame.image).save(buffer, format="JPEG")
context.add_message(
{"content": frame.text, "role": "user", "data": buffer, "mime_type": "image/jpeg"}
)
return context
@property
def messages(self) -> List[ChatCompletionMessageParam]:
return self._messages
@@ -167,12 +144,12 @@ class OpenAILLMContext:
Image.frombytes(format, size, image).save(buffer, format="JPEG")
encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
content = [
{"type": "text", "text": text},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}},
]
content = []
if text:
content.append({"type": "text", "text": text})
content.append(
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}},
)
self.add_message({"role": "user", "content": content})
def add_audio_frames_message(self, *, audio_frames: list[AudioRawFrame], text: str = None):

View File

@@ -294,7 +294,10 @@ class BaseOpenAILLMService(LLMService):
elif isinstance(frame, LLMMessagesFrame):
context = OpenAILLMContext.from_messages(frame.messages)
elif isinstance(frame, VisionImageRawFrame):
context = OpenAILLMContext.from_image_frame(frame)
context = OpenAILLMContext()
context.add_image_frame_message(
format=frame.format, size=frame.size, image=frame.image, text=frame.text
)
elif isinstance(frame, LLMUpdateSettingsFrame):
await self._update_settings(frame.settings)
else: