openai: remove from_image_frame() and use add_image_frame_message()
This commit is contained in:
@@ -19,7 +19,6 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
FunctionCallInProgressFrame,
|
||||
FunctionCallResultFrame,
|
||||
VisionImageRawFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
@@ -71,28 +70,6 @@ class OpenAILLMContext:
|
||||
context.add_message(message)
|
||||
return context
|
||||
|
||||
# todo: deprecate from_image_frame. It's only used to create a single-use
|
||||
# context, which isn't useful for most real-world applications.
|
||||
@staticmethod
|
||||
def from_image_frame(frame: VisionImageRawFrame) -> "OpenAILLMContext":
|
||||
"""
|
||||
For images, we are deviating from the OpenAI messages shape. OpenAI
|
||||
expects images to be base64 encoded, but other vision models may not.
|
||||
So we'll store the image as bytes and do the base64 encoding as needed
|
||||
in the LLM service.
|
||||
|
||||
NOTE: the above only applies to the deprecated use of this method. The
|
||||
add_image_frame_message() below does the base64 encoding as expected
|
||||
in the OpenAI format.
|
||||
"""
|
||||
context = OpenAILLMContext()
|
||||
buffer = io.BytesIO()
|
||||
Image.frombytes(frame.format, frame.size, frame.image).save(buffer, format="JPEG")
|
||||
context.add_message(
|
||||
{"content": frame.text, "role": "user", "data": buffer, "mime_type": "image/jpeg"}
|
||||
)
|
||||
return context
|
||||
|
||||
@property
|
||||
def messages(self) -> List[ChatCompletionMessageParam]:
|
||||
return self._messages
|
||||
@@ -167,12 +144,12 @@ class OpenAILLMContext:
|
||||
Image.frombytes(format, size, image).save(buffer, format="JPEG")
|
||||
encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||
|
||||
content = [
|
||||
{"type": "text", "text": text},
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}},
|
||||
]
|
||||
content = []
|
||||
if text:
|
||||
content.append({"type": "text", "text": text})
|
||||
content.append(
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}},
|
||||
)
|
||||
self.add_message({"role": "user", "content": content})
|
||||
|
||||
def add_audio_frames_message(self, *, audio_frames: list[AudioRawFrame], text: str = None):
|
||||
|
||||
@@ -294,7 +294,10 @@ class BaseOpenAILLMService(LLMService):
|
||||
elif isinstance(frame, LLMMessagesFrame):
|
||||
context = OpenAILLMContext.from_messages(frame.messages)
|
||||
elif isinstance(frame, VisionImageRawFrame):
|
||||
context = OpenAILLMContext.from_image_frame(frame)
|
||||
context = OpenAILLMContext()
|
||||
context.add_image_frame_message(
|
||||
format=frame.format, size=frame.size, image=frame.image, text=frame.text
|
||||
)
|
||||
elif isinstance(frame, LLMUpdateSettingsFrame):
|
||||
await self._update_settings(frame.settings)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user