first working llama-vision

This commit is contained in:
Kwindla Hultman Kramer
2024-08-29 11:19:00 -07:00
parent 84d72c0d5c
commit 58f3965cdc
2 changed files with 29 additions and 4 deletions

View File

@@ -48,7 +48,7 @@ class MoondreamService(VisionService):
self,
*,
model="vikhyatk/moondream2",
revision="2024-04-02",
revision="2024-08-26",
use_cpu=False
):
super().__init__()

View File

@@ -181,7 +181,11 @@ class TogetherLLMService(LLMService):
# We get here if the LLM returns a function call with invalid JSON arguments. This could happen
# because of LLM non-determinism, or maybe more often because of user error in the prompt.
# Should we do anything more than log a warning?
logger.debug(f"Error parsing function arguments: {error}")
logger.debug(
f"Error parsing function arguments: {error} - {function_call_accumulator}")
# Error parsing function arguments: Extra data: line 1 column 23 (char 22)
# - <function=get_current_weather>{"location": "London"}"</function>
class TogetherLLMContext(OpenAILLMContext):
@@ -190,6 +194,7 @@ class TogetherLLMContext(OpenAILLMContext):
messages: list[dict] | None = None,
):
super().__init__(messages=messages)
self._user_image_request_context = {}
@classmethod
def from_openai_context(cls, openai_context: OpenAILLMContext):
@@ -219,9 +224,17 @@ class TogetherUserContextAggregator(LLMUserContextAggregator):
if isinstance(context, OpenAILLMContext):
self._context = TogetherLLMContext.from_openai_context(context)
def get_messages_frame(self):
return OpenAILLMContextFrame(self._context)
async def push_messages_frame(self):
frame = OpenAILLMContextFrame(self._context)
await self.push_frame(frame)
await self.push_frame(self.get_messages_frame())
def append_image_description_tool_message(self, description):
self._context.add_message({
"role": "tool",
"content": json.dumps({"image_description": description})
})
async def process_frame(self, frame, direction):
await super().process_frame(frame, direction)
@@ -244,6 +257,18 @@ class TogetherUserContextAggregator(LLMUserContextAggregator):
else:
if frame.user_id in self._context._user_image_request_context:
del self._context._user_image_request_context[frame.user_id]
elif isinstance(frame, UserImageRawFrame):
text = self._context._user_image_request_context.get(frame.user_id) or ""
if text:
del self._context._user_image_request_context[frame.user_id]
frame = VisionImageRawFrame(
image=frame.image,
size=frame.size,
format=frame.format,
text=text,
)
await self.push_frame(frame)
except Exception as e:
logger.error(f"Error processing frame: {e}")