feat: support base64 and upload image input modes in chat

Add a unified way to send images in chat: inline base64 data URLs
(passed through natively) or auto-upload via image_input_mode="upload",
which replaces inline data URLs with hosted URLs using upload_chat_image.

- New fastgpt_client/images.py with content-part / data-URL helpers
- image_input_mode + appId/outLinkAuthData params on create_chat_completion
  (sync and async); upload failures fall back to inline base64
- Tests covering helpers, both modes, validation, and fallback

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Xin Wang
2026-06-04 09:50:37 +08:00
parent 4e980a3dd0
commit 07f30af105
5 changed files with 495 additions and 0 deletions

View File

@@ -17,6 +17,14 @@ from fastgpt_client.exceptions import (
StreamParseError, StreamParseError,
ValidationError, ValidationError,
) )
from fastgpt_client.images import (
decode_data_url,
encode_image_data_url,
image_part_from_bytes,
image_part_from_path,
image_url_part,
is_data_url,
)
from fastgpt_client.streaming import aiter_stream_events, iter_stream_events from fastgpt_client.streaming import aiter_stream_events, iter_stream_events
from fastgpt_client.stream_types import FastGPTInteractiveEvent, FastGPTStreamEvent from fastgpt_client.stream_types import FastGPTInteractiveEvent, FastGPTStreamEvent
@@ -41,6 +49,13 @@ __all__ = [
"aiter_stream_events", "aiter_stream_events",
"FastGPTStreamEvent", "FastGPTStreamEvent",
"FastGPTInteractiveEvent", "FastGPTInteractiveEvent",
# Image helpers
"encode_image_data_url",
"decode_data_url",
"is_data_url",
"image_url_part",
"image_part_from_bytes",
"image_part_from_path",
] ]
__version__ = "0.1.0" __version__ = "0.1.0"

View File

@@ -3,6 +3,8 @@
import asyncio import asyncio
import logging import logging
import mimetypes import mimetypes
import os
import tempfile
from pathlib import Path from pathlib import Path
import weakref import weakref
from typing import Any, Dict, Literal, Union from typing import Any, Dict, Literal, Union
@@ -11,6 +13,7 @@ import httpx
from .base_client import BaseClientMixin from .base_client import BaseClientMixin
from .exceptions import APIError, AuthenticationError, RateLimitError, ValidationError from .exceptions import APIError, AuthenticationError, RateLimitError, ValidationError
from .images import decode_data_url, image_url_part, is_data_url
class AsyncFastGPTClient(BaseClientMixin): class AsyncFastGPTClient(BaseClientMixin):
@@ -317,6 +320,10 @@ class AsyncChatClient(AsyncFastGPTClient):
detail: bool = False, detail: bool = False,
variables: dict[str, Any] | None = None, variables: dict[str, Any] | None = None,
responseChatItemId: str | None = None, responseChatItemId: str | None = None,
*,
image_input_mode: Literal["base64", "upload"] = "base64",
appId: str | None = None,
outLinkAuthData: dict[str, Any] | None = None,
): ):
"""Create a chat completion. """Create a chat completion.
@@ -327,12 +334,35 @@ class AsyncChatClient(AsyncFastGPTClient):
detail: Whether to return detailed response data detail: Whether to return detailed response data
variables: Template variables for substitution variables: Template variables for substitution
responseChatItemId: Custom ID for the response message responseChatItemId: Custom ID for the response message
image_input_mode: How to deliver inline base64 ``image_url`` parts.
``"base64"`` (default) sends the data URL as-is. ``"upload"``
uploads each inline data URL via :meth:`upload_chat_image` and
replaces it with the hosted URL (requires ``appId`` and
``chatId``). Image parts that already reference a plain URL are
left untouched in both modes.
appId: Application ID, required when ``image_input_mode="upload"``.
outLinkAuthData: Optional share-link auth payload forwarded to the
upload requests in ``"upload"`` mode.
Returns: Returns:
httpx.Response object httpx.Response object
""" """
self._validate_params(messages=messages) self._validate_params(messages=messages)
if image_input_mode == "upload":
if not appId or not chatId:
raise ValidationError(
"image_input_mode='upload' requires both appId and chatId"
)
messages = await self._resolve_image_inputs(
messages,
appId=appId,
chatId=chatId,
outLinkAuthData=outLinkAuthData,
)
elif image_input_mode != "base64":
raise ValidationError("image_input_mode must be 'base64' or 'upload'")
data = { data = {
"messages": messages, "messages": messages,
"stream": stream, "stream": stream,
@@ -592,6 +622,95 @@ class AsyncChatClient(AsyncFastGPTClient):
outLinkAuthData=outLinkAuthData, outLinkAuthData=outLinkAuthData,
) )
async def _resolve_image_inputs(
self,
messages: list[dict],
*,
appId: str,
chatId: str,
outLinkAuthData: dict[str, Any] | None = None,
) -> list[dict]:
"""Upload inline base64 ``image_url`` parts and swap in the hosted URLs.
Returns new message/content objects; the input ``messages`` are never
mutated. Parts whose URL is not an inline data URL are passed through.
"""
resolved: list[dict] = []
for message in messages:
content = message.get("content")
if not isinstance(content, list):
resolved.append(message)
continue
new_content: list[Any] = []
for part in content:
url = (
part.get("image_url", {}).get("url")
if isinstance(part, dict) and part.get("type") == "image_url"
else None
)
if is_data_url(url):
new_content.append(
image_url_part(
await self._upload_data_url(
url,
appId=appId,
chatId=chatId,
outLinkAuthData=outLinkAuthData,
)
)
)
else:
new_content.append(part)
resolved.append({**message, "content": new_content})
return resolved
async def _upload_data_url(
self,
data_url: str,
*,
appId: str,
chatId: str,
outLinkAuthData: dict[str, Any] | None = None,
) -> str:
"""Upload a ``data:image/...;base64,...`` URL and return the hosted URL.
Falls back to the original data URL if decoding or upload fails so the
request can still proceed with inline base64.
"""
try:
mime_type, raw = decode_data_url(data_url)
except ValueError as exc:
self.logger.warning("Skipping image upload; invalid base64 data URL: %s", exc)
return data_url
suffix = mimetypes.guess_extension(mime_type) or ".jpg"
tmp_path: str | None = None
try:
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(raw)
tmp_path = tmp.name
result = await self.upload_chat_image(
appId=appId,
chatId=chatId,
file_path=tmp_path,
outLinkAuthData=outLinkAuthData,
)
url = result.get("url") if isinstance(result, dict) else None
if isinstance(url, str) and url:
return url
self.logger.warning("Image upload returned no url; using inline base64")
return data_url
except Exception as exc: # noqa: BLE001 - graceful fallback to inline base64
self.logger.warning("Image upload failed; using inline base64: %s", exc)
return data_url
finally:
if tmp_path is not None:
try:
os.unlink(tmp_path)
except OSError:
pass
async def get_chat_histories( async def get_chat_histories(
self, self,
appId: str, appId: str,

View File

@@ -2,6 +2,8 @@
import logging import logging
import mimetypes import mimetypes
import os
import tempfile
from pathlib import Path from pathlib import Path
import weakref import weakref
from typing import Any, Dict, Literal, Union from typing import Any, Dict, Literal, Union
@@ -10,6 +12,7 @@ import httpx
from .base_client import BaseClientMixin from .base_client import BaseClientMixin
from .exceptions import APIError, AuthenticationError, RateLimitError, ValidationError from .exceptions import APIError, AuthenticationError, RateLimitError, ValidationError
from .images import decode_data_url, image_url_part, is_data_url
class FastGPTClient(BaseClientMixin): class FastGPTClient(BaseClientMixin):
@@ -261,6 +264,10 @@ class ChatClient(FastGPTClient):
detail: bool = False, detail: bool = False,
variables: dict[str, Any] | None = None, variables: dict[str, Any] | None = None,
responseChatItemId: str | None = None, responseChatItemId: str | None = None,
*,
image_input_mode: Literal["base64", "upload"] = "base64",
appId: str | None = None,
outLinkAuthData: dict[str, Any] | None = None,
): ):
"""Create a chat completion. """Create a chat completion.
@@ -271,12 +278,35 @@ class ChatClient(FastGPTClient):
detail: Whether to return detailed response data detail: Whether to return detailed response data
variables: Template variables for substitution variables: Template variables for substitution
responseChatItemId: Custom ID for the response message responseChatItemId: Custom ID for the response message
image_input_mode: How to deliver inline base64 ``image_url`` parts.
``"base64"`` (default) sends the data URL as-is. ``"upload"``
uploads each inline data URL via :meth:`upload_chat_image` and
replaces it with the hosted URL (requires ``appId`` and
``chatId``). Image parts that already reference a plain URL are
left untouched in both modes.
appId: Application ID, required when ``image_input_mode="upload"``.
outLinkAuthData: Optional share-link auth payload forwarded to the
upload requests in ``"upload"`` mode.
Returns: Returns:
httpx.Response object httpx.Response object
""" """
self._validate_params(messages=messages) self._validate_params(messages=messages)
if image_input_mode == "upload":
if not appId or not chatId:
raise ValidationError(
"image_input_mode='upload' requires both appId and chatId"
)
messages = self._resolve_image_inputs(
messages,
appId=appId,
chatId=chatId,
outLinkAuthData=outLinkAuthData,
)
elif image_input_mode != "base64":
raise ValidationError("image_input_mode must be 'base64' or 'upload'")
data = { data = {
"messages": messages, "messages": messages,
"stream": stream, "stream": stream,
@@ -546,6 +576,95 @@ class ChatClient(FastGPTClient):
outLinkAuthData=outLinkAuthData, outLinkAuthData=outLinkAuthData,
) )
def _resolve_image_inputs(
self,
messages: list[dict],
*,
appId: str,
chatId: str,
outLinkAuthData: dict[str, Any] | None = None,
) -> list[dict]:
"""Upload inline base64 ``image_url`` parts and swap in the hosted URLs.
Returns new message/content objects; the input ``messages`` are never
mutated. Parts whose URL is not an inline data URL are passed through.
"""
resolved: list[dict] = []
for message in messages:
content = message.get("content")
if not isinstance(content, list):
resolved.append(message)
continue
new_content: list[Any] = []
for part in content:
url = (
part.get("image_url", {}).get("url")
if isinstance(part, dict) and part.get("type") == "image_url"
else None
)
if is_data_url(url):
new_content.append(
image_url_part(
self._upload_data_url(
url,
appId=appId,
chatId=chatId,
outLinkAuthData=outLinkAuthData,
)
)
)
else:
new_content.append(part)
resolved.append({**message, "content": new_content})
return resolved
def _upload_data_url(
self,
data_url: str,
*,
appId: str,
chatId: str,
outLinkAuthData: dict[str, Any] | None = None,
) -> str:
"""Upload a ``data:image/...;base64,...`` URL and return the hosted URL.
Falls back to the original data URL if decoding or upload fails so the
request can still proceed with inline base64.
"""
try:
mime_type, raw = decode_data_url(data_url)
except ValueError as exc:
self.logger.warning("Skipping image upload; invalid base64 data URL: %s", exc)
return data_url
suffix = mimetypes.guess_extension(mime_type) or ".jpg"
tmp_path: str | None = None
try:
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(raw)
tmp_path = tmp.name
result = self.upload_chat_image(
appId=appId,
chatId=chatId,
file_path=tmp_path,
outLinkAuthData=outLinkAuthData,
)
url = result.get("url") if isinstance(result, dict) else None
if isinstance(url, str) and url:
return url
self.logger.warning("Image upload returned no url; using inline base64")
return data_url
except Exception as exc: # noqa: BLE001 - graceful fallback to inline base64
self.logger.warning("Image upload failed; using inline base64: %s", exc)
return data_url
finally:
if tmp_path is not None:
try:
os.unlink(tmp_path)
except OSError:
pass
def get_chat_histories( def get_chat_histories(
self, self,
appId: str, appId: str,

80
fastgpt_client/images.py Normal file
View File

@@ -0,0 +1,80 @@
"""Helpers for attaching images to FastGPT chat messages.
FastGPT is OpenAI-compatible and accepts two ways to send an image inside a
message's ``content`` list as an ``image_url`` part:
1. **Inline base64** — a ``data:<mime>;base64,<payload>`` data URL. Nothing is
uploaded; the image travels inside the request body. Cheapest to send and
requires no ``appId``/``chatId``.
2. **Uploaded URL** — upload the bytes first (see
:meth:`ChatClient.upload_chat_image`) and reference the returned URL. Keeps
the request body small and lets the image be reused/previewed later.
These helpers build the ``image_url`` content parts for either method and let
you convert an inline data URL back into raw bytes (used when uploading).
"""
from __future__ import annotations
import base64
import binascii
import mimetypes
from pathlib import Path
from typing import Any
__all__ = [
"encode_image_data_url",
"is_data_url",
"decode_data_url",
"image_url_part",
"image_part_from_bytes",
"image_part_from_path",
]
_DATA_URL_PREFIX = "data:"
def encode_image_data_url(data: bytes, mime_type: str = "image/jpeg") -> str:
"""Return a ``data:<mime>;base64,<payload>`` URL for raw image bytes."""
payload = base64.b64encode(data).decode("ascii")
return f"{_DATA_URL_PREFIX}{mime_type};base64,{payload}"
def is_data_url(url: Any) -> bool:
"""True if ``url`` is an inline base64 data URL (``data:...``)."""
return isinstance(url, str) and url.startswith(_DATA_URL_PREFIX)
def decode_data_url(data_url: str) -> tuple[str, bytes]:
"""Split a base64 data URL into ``(mime_type, raw_bytes)``.
Raises:
ValueError: if ``data_url`` is not a data URL or its base64 payload is
malformed.
"""
if not is_data_url(data_url):
raise ValueError("Not a base64 data URL")
header, _, payload = data_url.partition(",")
mime_type = header[len(_DATA_URL_PREFIX):].split(";", 1)[0].strip() or "image/jpeg"
try:
raw = base64.b64decode(payload, validate=True)
except (binascii.Error, ValueError) as exc:
raise ValueError(f"Invalid base64 data URL payload: {exc}") from exc
return mime_type, raw
def image_url_part(url: str) -> dict[str, Any]:
"""Build an ``image_url`` content part from a URL or inline data URL."""
return {"type": "image_url", "image_url": {"url": url}}
def image_part_from_bytes(data: bytes, mime_type: str = "image/jpeg") -> dict[str, Any]:
"""Build an inline base64 ``image_url`` content part from raw bytes."""
return image_url_part(encode_image_data_url(data, mime_type))
def image_part_from_path(file_path: str | Path) -> dict[str, Any]:
"""Read a local image file and build an inline base64 ``image_url`` part."""
path = Path(file_path)
mime_type = mimetypes.guess_type(path.name)[0] or "image/jpeg"
return image_part_from_bytes(path.read_bytes(), mime_type)

162
tests/test_image_inputs.py Normal file
View File

@@ -0,0 +1,162 @@
"""Tests for image-input helpers and the base64/upload handling modes."""
from unittest.mock import AsyncMock, Mock, patch
import httpx
import pytest
from fastgpt_client import images
from fastgpt_client.async_client import AsyncChatClient
from fastgpt_client.client import ChatClient
from fastgpt_client.exceptions import ValidationError
class TestImageHelpers:
def test_encode_decode_round_trip(self):
url = images.encode_image_data_url(b"hello", "image/png")
assert url.startswith("data:image/png;base64,")
assert images.is_data_url(url)
mime, raw = images.decode_data_url(url)
assert mime == "image/png"
assert raw == b"hello"
def test_is_data_url_rejects_plain_url(self):
assert not images.is_data_url("https://example.com/a.png")
assert not images.is_data_url(None)
def test_decode_rejects_non_data_url(self):
with pytest.raises(ValueError):
images.decode_data_url("https://example.com/a.png")
def test_decode_rejects_bad_base64(self):
with pytest.raises(ValueError):
images.decode_data_url("data:image/png;base64,!!!notbase64!!!")
def test_image_url_part_shape(self):
assert images.image_url_part("u") == {
"type": "image_url",
"image_url": {"url": "u"},
}
def test_image_part_from_path(self, tmp_path):
f = tmp_path / "pic.png"
f.write_bytes(b"\x89PNG\r\n")
part = images.image_part_from_path(f)
assert part["type"] == "image_url"
assert part["image_url"]["url"].startswith("data:image/png;base64,")
def _data_url():
return images.encode_image_data_url(b"imgbytes", "image/jpeg")
class TestSyncImageInputMode:
def test_base64_mode_passes_through(self, api_key):
client = ChatClient(api_key)
url = _data_url()
messages = [{"role": "user", "content": [images.image_url_part(url)]}]
mock_response = Mock(spec=httpx.Response)
mock_response.status_code = 200
with patch.object(client, "_send_request", return_value=mock_response) as send, \
patch.object(client, "upload_chat_image") as upload:
client.create_chat_completion(messages=messages)
upload.assert_not_called()
sent = send.call_args[1]["json"]["messages"]
assert sent[0]["content"][0]["image_url"]["url"] == url
def test_upload_mode_replaces_data_url(self, api_key):
client = ChatClient(api_key)
url = _data_url()
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "hi"},
images.image_url_part(url),
images.image_url_part("https://keep/me.png"),
],
}
]
mock_response = Mock(spec=httpx.Response)
mock_response.status_code = 200
with patch.object(client, "_send_request", return_value=mock_response) as send, \
patch.object(
client, "upload_chat_image", return_value={"url": "https://cdn/up.jpg"}
) as upload:
client.create_chat_completion(
messages=messages,
chatId="chat-1",
appId="app-1",
image_input_mode="upload",
)
upload.assert_called_once()
sent = send.call_args[1]["json"]["messages"]
assert sent[0]["content"][1]["image_url"]["url"] == "https://cdn/up.jpg"
# plain URL part untouched
assert sent[0]["content"][2]["image_url"]["url"] == "https://keep/me.png"
# original messages not mutated
assert messages[0]["content"][1]["image_url"]["url"] == url
def test_upload_mode_requires_app_and_chat_id(self, api_key):
client = ChatClient(api_key)
messages = [{"role": "user", "content": [images.image_url_part(_data_url())]}]
with pytest.raises(ValidationError):
client.create_chat_completion(messages=messages, image_input_mode="upload")
def test_invalid_mode_raises(self, api_key):
client = ChatClient(api_key)
messages = [{"role": "user", "content": "hi"}]
with pytest.raises(ValidationError):
client.create_chat_completion(messages=messages, image_input_mode="nope")
def test_upload_failure_falls_back_to_base64(self, api_key):
client = ChatClient(api_key)
url = _data_url()
messages = [{"role": "user", "content": [images.image_url_part(url)]}]
mock_response = Mock(spec=httpx.Response)
mock_response.status_code = 200
with patch.object(client, "_send_request", return_value=mock_response) as send, \
patch.object(client, "upload_chat_image", side_effect=RuntimeError("boom")):
client.create_chat_completion(
messages=messages, chatId="c", appId="a", image_input_mode="upload"
)
sent = send.call_args[1]["json"]["messages"]
assert sent[0]["content"][0]["image_url"]["url"] == url
class TestAsyncImageInputMode:
@pytest.mark.asyncio
async def test_upload_mode_replaces_data_url(self, api_key):
client = AsyncChatClient(api_key)
url = _data_url()
messages = [{"role": "user", "content": [images.image_url_part(url)]}]
mock_response = Mock(spec=httpx.Response)
mock_response.status_code = 200
with patch.object(client, "_send_request", new=AsyncMock(return_value=mock_response)) as send, \
patch.object(
client,
"upload_chat_image",
new=AsyncMock(return_value={"url": "https://cdn/u.jpg"}),
):
await client.create_chat_completion(
messages=messages, chatId="c", appId="a", image_input_mode="upload"
)
sent = send.call_args[1]["json"]["messages"]
assert sent[0]["content"][0]["image_url"]["url"] == "https://cdn/u.jpg"
await client.close()
@pytest.mark.asyncio
async def test_upload_mode_requires_app_and_chat_id(self, api_key):
client = AsyncChatClient(api_key)
messages = [{"role": "user", "content": [images.image_url_part(_data_url())]}]
with pytest.raises(ValidationError):
await client.create_chat_completion(messages=messages, image_input_mode="upload")
await client.close()