Add chat image upload support

This commit is contained in:
Xin Wang
2026-06-01 13:39:22 +08:00
parent a55ca37c39
commit 96d685da91
7 changed files with 951 additions and 4 deletions

View File

@@ -7,6 +7,7 @@ Run from the examples directory with .env configured:
This example provides:
- a full-screen Textual interface
- streaming chat updates
- image turns with /image <path> [prompt]
- workflow / tool event logging
- modal handling for FastGPT interactive nodes
"""
@@ -15,6 +16,7 @@ from __future__ import annotations
import argparse
import json
import shlex
import sys
import uuid
from pathlib import Path
@@ -44,6 +46,42 @@ from chat_cli import (
from fastgpt_client import ChatClient, FastGPTInteractiveEvent, iter_stream_events
DEFAULT_IMAGE_PROMPT = "Please describe this image."
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp"}
def parse_image_command(content: str) -> Optional[tuple[Path, str]]:
"""Parse `/image <path> [prompt]`, including terminal-dragged quoted paths."""
stripped = content.strip()
if not stripped.startswith("/image"):
return None
command, _, rest = stripped.partition(" ")
if command != "/image":
return None
if not rest.strip():
raise ValueError("Usage: /image <path> [prompt]")
try:
parts = shlex.split(rest)
except ValueError as exc:
raise ValueError(f"Could not parse image command: {exc}") from exc
if not parts:
raise ValueError("Usage: /image <path> [prompt]")
path = Path(parts[0]).expanduser()
if not path.exists():
raise ValueError(f"Image file not found: {path}")
if not path.is_file():
raise ValueError(f"Image path is not a file: {path}")
if path.suffix.lower() not in IMAGE_EXTENSIONS:
raise ValueError(f"Unsupported image extension: {path.suffix or '(none)'}")
prompt = " ".join(parts[1:]).strip() or DEFAULT_IMAGE_PROMPT
return path, prompt
class MessageCard(Static):
"""Lightweight message block used in the transcript pane."""
@@ -435,7 +473,7 @@ class FastGPTWorkbench(App[None]):
yield Static("FastGPT Workbench", id="brand")
yield Static("", id="session_panel", classes="panel")
yield Static("", id="status_panel", classes="panel")
yield Static("Ctrl+J send\nCtrl+N new chat\nEsc closes modal prompts", classes="panel")
yield Static("Ctrl+J send\nCtrl+N new chat\n/image <path> [prompt]", classes="panel")
yield RichLog(id="event_log", wrap=True, highlight=False, markup=False)
with Vertical(id="main_panel"):
yield Static("Claude-style FastGPT Console", id="chat_title")
@@ -486,7 +524,7 @@ class FastGPTWorkbench(App[None]):
return content
def _default_session_message(self) -> str:
return "Start typing below. FastGPT workflow events will appear in the left rail."
return "Start typing below. Use /image <path> [prompt] to attach a local image."
def _initial_session_message(self) -> str:
if not APP_ID:
@@ -517,7 +555,14 @@ class FastGPTWorkbench(App[None]):
self.query_one(f"#{card_id}", MessageCard).set_text("Thinking…")
return card_id
def _start_turn(self, content: str, *, title: str = "You", role: str = "user") -> None:
def _start_turn(
self,
content: str,
*,
title: str = "You",
role: str = "user",
messages: Optional[List[Dict[str, Any]]] = None,
) -> None:
if self._busy:
self._log_event("[local] Busy streaming. Wait for the current turn to finish.")
return
@@ -528,7 +573,27 @@ class FastGPTWorkbench(App[None]):
self._busy = True
self._set_status("Streaming", "Receiving FastGPT output")
self._stream_turn(
messages=[{"role": "user", "content": content}],
messages=messages or [{"role": "user", "content": content}],
assistant_card_id=assistant_card_id,
)
def _start_image_turn(self, image_path: Path, prompt: str) -> None:
if self._busy:
self._log_event("[local] Busy streaming. Wait for the current turn to finish.")
return
if not APP_ID:
self._log_event("[local] APP_ID is required for image upload.")
self._set_status("Error", "APP_ID is required for image upload")
return
display_content = f"{prompt}\n\n[image] {image_path}"
self._append_message(role="user", title="You", content=display_content)
assistant_card_id = self._assistant_card()
self._busy = True
self._set_status("Uploading", image_path.name)
self._stream_image_turn(
image_path=image_path,
prompt=prompt,
assistant_card_id=assistant_card_id,
)
@@ -580,8 +645,19 @@ class FastGPTWorkbench(App[None]):
content = composer.text.strip()
if not content:
return
try:
image_command = parse_image_command(content)
except ValueError as exc:
self._log_event(f"[local] {exc}")
self._set_status("Error", str(exc))
return
composer.text = ""
composer.focus()
if image_command is not None:
image_path, prompt = image_command
self._start_image_turn(image_path, prompt)
return
self._start_turn(content)
def action_new_chat(self) -> None:
@@ -687,6 +763,104 @@ class FastGPTWorkbench(App[None]):
waiting_interactive=interactive_event is not None,
)
@work(thread=True, exclusive=True)
def _stream_image_turn(self, image_path: Path, prompt: str, assistant_card_id: str) -> None:
try:
with ChatClient(api_key=API_KEY, base_url=BASE_URL) as client:
self.call_from_thread(self._log_event, f"[image] uploading: {image_path}")
image = client.upload_chat_image(
appId=APP_ID,
chatId=self.chat_id,
file_path=image_path,
)
image_url = image.get("url")
if not image_url:
raise RuntimeError("FastGPT did not return an image preview URL")
self.call_from_thread(self._log_event, f"[image] uploaded: {image_path.name}")
self.call_from_thread(self._set_status, "Streaming", "Receiving FastGPT output")
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_url}},
],
}
]
response = client.create_chat_completion(
messages=messages,
stream=True,
detail=True,
chatId=self.chat_id,
)
response.raise_for_status()
try:
for event in iter_stream_events(response):
if event.kind in {"data", "answer", "fastAnswer"}:
content = _extract_text_from_event(event.kind, event.data)
if content:
self.call_from_thread(self._append_assistant_chunk, assistant_card_id, content)
continue
if event.kind == "flowNodeStatus":
if isinstance(event.data, dict):
status = str(event.data.get("status") or "?")
node_name = str(event.data.get("nodeName") or event.data.get("name") or event.data.get("node_id") or "Unknown node")
self.call_from_thread(self._log_event, f"[flow] {status}: {node_name}")
else:
self.call_from_thread(self._log_event, f"[flow] {event.data}")
continue
if event.kind == "flowResponses":
if isinstance(event.data, dict):
module_name = str(event.data.get("moduleName") or event.data.get("nodeName") or "Unknown module")
self.call_from_thread(self._log_event, f"[flow] response from: {module_name}")
elif isinstance(event.data, list):
self.call_from_thread(self._log_event, f"[flow] response details: {len(event.data)} module record(s)")
else:
self.call_from_thread(self._log_event, f"[flow] response details: {event.data}")
continue
if event.kind == "toolCall":
tool_name = _tool_name_from_event(event.data)
self.call_from_thread(self._log_event, f"[tool] calling: {tool_name}")
continue
if event.kind == "toolParams":
self.call_from_thread(self._log_event, f"[tool] params: {event.data}")
continue
if event.kind == "toolResponse":
self.call_from_thread(self._log_event, f"[tool] response: {event.data}")
continue
if event.kind == "updateVariables":
self.call_from_thread(self._log_event, f"[vars] updated: {event.data}")
continue
if event.kind == "interactive":
self.call_from_thread(
self._log_event,
"[interactive] Image turns do not support workflow prompts yet.",
)
break
if event.kind == "error":
message = str(event.data.get("message") or event.data.get("error") or "Unknown FastGPT error")
raise RuntimeError(message)
if event.kind == "done":
break
finally:
response.close()
except Exception as exc:
self.call_from_thread(self._mark_turn_failed, assistant_card_id, str(exc))
return
self.call_from_thread(self._complete_turn, assistant_card_id, waiting_interactive=False)
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Textual FastGPT chat workbench")