Add chat image upload support

2026-06-01 13:39:22 +08:00
parent a55ca37c39
commit 96d685da91
7 changed files with 951 additions and 4 deletions
--- a/examples/chat_tui.py
+++ b/examples/chat_tui.py
@@ -7,6 +7,7 @@ Run from the examples directory with .env configured:
 This example provides:
 - a full-screen Textual interface
 - streaming chat updates
+- image turns with /image <path> [prompt]
 - workflow / tool event logging
 - modal handling for FastGPT interactive nodes
 """
@@ -15,6 +16,7 @@ from __future__ import annotations

 import argparse
 import json
+import shlex
 import sys
 import uuid
 from pathlib import Path
@@ -44,6 +46,42 @@ from chat_cli import (
 from fastgpt_client import ChatClient, FastGPTInteractiveEvent, iter_stream_events


+DEFAULT_IMAGE_PROMPT = "Please describe this image."
+IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp"}
+
+
+def parse_image_command(content: str) -> Optional[tuple[Path, str]]:
+    """Parse `/image <path> [prompt]`, including terminal-dragged quoted paths."""
+    stripped = content.strip()
+    if not stripped.startswith("/image"):
+        return None
+
+    command, _, rest = stripped.partition(" ")
+    if command != "/image":
+        return None
+    if not rest.strip():
+        raise ValueError("Usage: /image <path> [prompt]")
+
+    try:
+        parts = shlex.split(rest)
+    except ValueError as exc:
+        raise ValueError(f"Could not parse image command: {exc}") from exc
+
+    if not parts:
+        raise ValueError("Usage: /image <path> [prompt]")
+
+    path = Path(parts[0]).expanduser()
+    if not path.exists():
+        raise ValueError(f"Image file not found: {path}")
+    if not path.is_file():
+        raise ValueError(f"Image path is not a file: {path}")
+    if path.suffix.lower() not in IMAGE_EXTENSIONS:
+        raise ValueError(f"Unsupported image extension: {path.suffix or '(none)'}")
+
+    prompt = " ".join(parts[1:]).strip() or DEFAULT_IMAGE_PROMPT
+    return path, prompt
+
+
 class MessageCard(Static):
    """Lightweight message block used in the transcript pane."""

@@ -435,7 +473,7 @@ class FastGPTWorkbench(App[None]):
                yield Static("FastGPT Workbench", id="brand")
                yield Static("", id="session_panel", classes="panel")
                yield Static("", id="status_panel", classes="panel")
-                yield Static("Ctrl+J send\nCtrl+N new chat\nEsc closes modal prompts", classes="panel")
+                yield Static("Ctrl+J send\nCtrl+N new chat\n/image <path> [prompt]", classes="panel")
                yield RichLog(id="event_log", wrap=True, highlight=False, markup=False)
            with Vertical(id="main_panel"):
                yield Static("Claude-style FastGPT Console", id="chat_title")
@@ -486,7 +524,7 @@ class FastGPTWorkbench(App[None]):
            return content

    def _default_session_message(self) -> str:
-        return "Start typing below. FastGPT workflow events will appear in the left rail."
+        return "Start typing below. Use /image <path> [prompt] to attach a local image."

    def _initial_session_message(self) -> str:
        if not APP_ID:
@@ -517,7 +555,14 @@ class FastGPTWorkbench(App[None]):
        self.query_one(f"#{card_id}", MessageCard).set_text("Thinking…")
        return card_id

-    def _start_turn(self, content: str, *, title: str = "You", role: str = "user") -> None:
+    def _start_turn(
+        self,
+        content: str,
+        *,
+        title: str = "You",
+        role: str = "user",
+        messages: Optional[List[Dict[str, Any]]] = None,
+    ) -> None:
        if self._busy:
            self._log_event("[local] Busy streaming. Wait for the current turn to finish.")
            return
@@ -528,7 +573,27 @@ class FastGPTWorkbench(App[None]):
        self._busy = True
        self._set_status("Streaming", "Receiving FastGPT output")
        self._stream_turn(
-            messages=[{"role": "user", "content": content}],
+            messages=messages or [{"role": "user", "content": content}],
+            assistant_card_id=assistant_card_id,
+        )
+
+    def _start_image_turn(self, image_path: Path, prompt: str) -> None:
+        if self._busy:
+            self._log_event("[local] Busy streaming. Wait for the current turn to finish.")
+            return
+        if not APP_ID:
+            self._log_event("[local] APP_ID is required for image upload.")
+            self._set_status("Error", "APP_ID is required for image upload")
+            return
+
+        display_content = f"{prompt}\n\n[image] {image_path}"
+        self._append_message(role="user", title="You", content=display_content)
+        assistant_card_id = self._assistant_card()
+        self._busy = True
+        self._set_status("Uploading", image_path.name)
+        self._stream_image_turn(
+            image_path=image_path,
+            prompt=prompt,
            assistant_card_id=assistant_card_id,
        )

@@ -580,8 +645,19 @@ class FastGPTWorkbench(App[None]):
        content = composer.text.strip()
        if not content:
            return
+        try:
+            image_command = parse_image_command(content)
+        except ValueError as exc:
+            self._log_event(f"[local] {exc}")
+            self._set_status("Error", str(exc))
+            return
+
        composer.text = ""
        composer.focus()
+        if image_command is not None:
+            image_path, prompt = image_command
+            self._start_image_turn(image_path, prompt)
+            return
        self._start_turn(content)

    def action_new_chat(self) -> None:
@@ -687,6 +763,104 @@ class FastGPTWorkbench(App[None]):
            waiting_interactive=interactive_event is not None,
        )

+    @work(thread=True, exclusive=True)
+    def _stream_image_turn(self, image_path: Path, prompt: str, assistant_card_id: str) -> None:
+        try:
+            with ChatClient(api_key=API_KEY, base_url=BASE_URL) as client:
+                self.call_from_thread(self._log_event, f"[image] uploading: {image_path}")
+                image = client.upload_chat_image(
+                    appId=APP_ID,
+                    chatId=self.chat_id,
+                    file_path=image_path,
+                )
+                image_url = image.get("url")
+                if not image_url:
+                    raise RuntimeError("FastGPT did not return an image preview URL")
+
+                self.call_from_thread(self._log_event, f"[image] uploaded: {image_path.name}")
+                self.call_from_thread(self._set_status, "Streaming", "Receiving FastGPT output")
+
+                messages = [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {"type": "image_url", "image_url": {"url": image_url}},
+                        ],
+                    }
+                ]
+                response = client.create_chat_completion(
+                    messages=messages,
+                    stream=True,
+                    detail=True,
+                    chatId=self.chat_id,
+                )
+                response.raise_for_status()
+                try:
+                    for event in iter_stream_events(response):
+                        if event.kind in {"data", "answer", "fastAnswer"}:
+                            content = _extract_text_from_event(event.kind, event.data)
+                            if content:
+                                self.call_from_thread(self._append_assistant_chunk, assistant_card_id, content)
+                            continue
+
+                        if event.kind == "flowNodeStatus":
+                            if isinstance(event.data, dict):
+                                status = str(event.data.get("status") or "?")
+                                node_name = str(event.data.get("nodeName") or event.data.get("name") or event.data.get("node_id") or "Unknown node")
+                                self.call_from_thread(self._log_event, f"[flow] {status}: {node_name}")
+                            else:
+                                self.call_from_thread(self._log_event, f"[flow] {event.data}")
+                            continue
+
+                        if event.kind == "flowResponses":
+                            if isinstance(event.data, dict):
+                                module_name = str(event.data.get("moduleName") or event.data.get("nodeName") or "Unknown module")
+                                self.call_from_thread(self._log_event, f"[flow] response from: {module_name}")
+                            elif isinstance(event.data, list):
+                                self.call_from_thread(self._log_event, f"[flow] response details: {len(event.data)} module record(s)")
+                            else:
+                                self.call_from_thread(self._log_event, f"[flow] response details: {event.data}")
+                            continue
+
+                        if event.kind == "toolCall":
+                            tool_name = _tool_name_from_event(event.data)
+                            self.call_from_thread(self._log_event, f"[tool] calling: {tool_name}")
+                            continue
+
+                        if event.kind == "toolParams":
+                            self.call_from_thread(self._log_event, f"[tool] params: {event.data}")
+                            continue
+
+                        if event.kind == "toolResponse":
+                            self.call_from_thread(self._log_event, f"[tool] response: {event.data}")
+                            continue
+
+                        if event.kind == "updateVariables":
+                            self.call_from_thread(self._log_event, f"[vars] updated: {event.data}")
+                            continue
+
+                        if event.kind == "interactive":
+                            self.call_from_thread(
+                                self._log_event,
+                                "[interactive] Image turns do not support workflow prompts yet.",
+                            )
+                            break
+
+                        if event.kind == "error":
+                            message = str(event.data.get("message") or event.data.get("error") or "Unknown FastGPT error")
+                            raise RuntimeError(message)
+
+                        if event.kind == "done":
+                            break
+                finally:
+                    response.close()
+        except Exception as exc:
+            self.call_from_thread(self._mark_turn_failed, assistant_card_id, str(exc))
+            return
+
+        self.call_from_thread(self._complete_turn, assistant_card_id, waiting_interactive=False)
+

 def _parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Textual FastGPT chat workbench")