Add image upload in conversation
This commit is contained in:
@@ -70,7 +70,8 @@
|
||||
"app_id": "6a153aed53e3f8d9f2744905",
|
||||
"variables": {},
|
||||
"detail": false,
|
||||
"timeout_sec": 60.0
|
||||
"timeout_sec": 60.0,
|
||||
"image_input_mode": "base64"
|
||||
},
|
||||
"tts": {
|
||||
"provider": "xfyun",
|
||||
|
||||
@@ -148,6 +148,8 @@ class LLMConfig:
|
||||
variables: dict[str, str] = field(default_factory=dict)
|
||||
detail: bool = False
|
||||
timeout_sec: float = 60.0
|
||||
# FastGPT image input mode: "base64" (inline data URL) or "upload" (presigned upload).
|
||||
image_input_mode: str = "base64"
|
||||
|
||||
@property
|
||||
def is_fastgpt(self) -> bool:
|
||||
@@ -257,6 +259,15 @@ def config_from_dict(data: dict) -> EngineConfig:
|
||||
llm["app_id"] = None
|
||||
if not isinstance(llm.get("variables"), dict):
|
||||
llm["variables"] = {}
|
||||
image_input_mode = str(
|
||||
llm.get("image_input_mode", LLMConfig().image_input_mode)
|
||||
).strip().lower()
|
||||
if image_input_mode not in {"base64", "upload"}:
|
||||
raise ValueError(
|
||||
"services.llm.image_input_mode must be 'base64' or 'upload', "
|
||||
f"got {llm.get('image_input_mode')!r}"
|
||||
)
|
||||
llm["image_input_mode"] = image_input_mode
|
||||
if agent.get("greeting_mode") == "fastgpt_opener" and llm["provider"] != "fastgpt":
|
||||
raise ValueError(
|
||||
"agent.greeting_mode='fastgpt_opener' requires services.llm.provider='fastgpt'"
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import binascii
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
@@ -73,6 +77,50 @@ def _message_text(message: dict[str, Any]) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
IMAGE_INPUT_MODE_BASE64 = "base64"
|
||||
IMAGE_INPUT_MODE_UPLOAD = "upload"
|
||||
SUPPORTED_IMAGE_INPUT_MODES = frozenset({IMAGE_INPUT_MODE_BASE64, IMAGE_INPUT_MODE_UPLOAD})
|
||||
|
||||
_MIME_TO_EXT = {
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/webp": ".webp",
|
||||
}
|
||||
|
||||
|
||||
def _message_has_image(message: dict[str, Any]) -> bool:
|
||||
content = message.get("content")
|
||||
if not isinstance(content, list):
|
||||
return False
|
||||
return any(
|
||||
isinstance(part, dict) and part.get("type") == "image_url"
|
||||
for part in content
|
||||
)
|
||||
|
||||
|
||||
def _redact_messages_for_log(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Replace base64 image data URLs with a short placeholder for logging."""
|
||||
redacted: list[dict[str, Any]] = []
|
||||
for message in messages:
|
||||
content = message.get("content")
|
||||
if not isinstance(content, list):
|
||||
redacted.append(message)
|
||||
continue
|
||||
parts: list[Any] = []
|
||||
for part in content:
|
||||
if (
|
||||
isinstance(part, dict)
|
||||
and part.get("type") == "image_url"
|
||||
and isinstance(part.get("image_url"), dict)
|
||||
):
|
||||
url = str(part["image_url"].get("url") or "")
|
||||
parts.append({"type": "image_url", "image_url": {"url": f"<{len(url)} chars>"}})
|
||||
else:
|
||||
parts.append(part)
|
||||
redacted.append({**message, "content": parts})
|
||||
return redacted
|
||||
|
||||
|
||||
def _first_nonempty_text(*values: Any) -> str:
|
||||
for value in values:
|
||||
if isinstance(value, str):
|
||||
@@ -172,6 +220,7 @@ class FastGPTLLMService(LLMService):
|
||||
app_id: str | None = None,
|
||||
greeting_prompt: str | None = None,
|
||||
timeout: float = 60.0,
|
||||
image_input_mode: str = IMAGE_INPUT_MODE_BASE64,
|
||||
settings: FastGPTLLMSettings | None = None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
@@ -183,6 +232,20 @@ class FastGPTLLMService(LLMService):
|
||||
self._chat_id = chat_id or f"voice_{uuid.uuid4().hex[:16]}"
|
||||
self._app_id = (app_id or "").strip()
|
||||
self._greeting_prompt = (greeting_prompt or "你好").strip() or "你好"
|
||||
|
||||
mode = (image_input_mode or IMAGE_INPUT_MODE_BASE64).strip().lower()
|
||||
if mode not in SUPPORTED_IMAGE_INPUT_MODES:
|
||||
raise ValueError(
|
||||
f"Unsupported image_input_mode {image_input_mode!r}; "
|
||||
f"expected one of {sorted(SUPPORTED_IMAGE_INPUT_MODES)}"
|
||||
)
|
||||
if mode == IMAGE_INPUT_MODE_UPLOAD and not self._app_id:
|
||||
logger.warning(
|
||||
"FastGPT image_input_mode='upload' requires app_id; "
|
||||
"falling back to inline base64"
|
||||
)
|
||||
mode = IMAGE_INPUT_MODE_BASE64
|
||||
self._image_input_mode = mode
|
||||
self._client = AsyncChatClient(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
@@ -310,26 +373,114 @@ class FastGPTLLMService(LLMService):
|
||||
if response is not None:
|
||||
await response.aclose()
|
||||
|
||||
def _build_fastgpt_messages(self, context: LLMContext) -> list[dict[str, str]]:
|
||||
def _build_fastgpt_messages(self, context: LLMContext) -> list[dict[str, Any]]:
|
||||
raw_messages = context.get_messages()
|
||||
|
||||
for message in reversed(raw_messages):
|
||||
if not isinstance(message, dict) or message.get("role") != "user":
|
||||
continue
|
||||
if _message_has_image(message):
|
||||
# Multimodal turn: forward the OpenAI-style content list as-is
|
||||
# (text parts + image_url with a base64 data URL). FastGPT's
|
||||
# /chat/completions accepts this directly.
|
||||
return [{"role": "user", "content": message["content"]}]
|
||||
text = _message_text(message)
|
||||
if text:
|
||||
return [{"role": "user", "content": text}]
|
||||
|
||||
return [{"role": "user", "content": self._greeting_prompt}]
|
||||
|
||||
async def _resolve_image_inputs(
|
||||
self, messages: list[dict[str, Any]]
|
||||
) -> list[dict[str, Any]]:
|
||||
"""In ``upload`` mode, replace inline base64 image data URLs with uploaded URLs.
|
||||
|
||||
In ``base64`` mode the messages are returned untouched (inline data URLs).
|
||||
New message/content objects are built so the shared ``LLMContext`` messages
|
||||
are never mutated.
|
||||
"""
|
||||
if self._image_input_mode != IMAGE_INPUT_MODE_UPLOAD:
|
||||
return messages
|
||||
|
||||
resolved: list[dict[str, Any]] = []
|
||||
for message in messages:
|
||||
content = message.get("content")
|
||||
if not isinstance(content, list):
|
||||
resolved.append(message)
|
||||
continue
|
||||
|
||||
new_content: list[Any] = []
|
||||
for part in content:
|
||||
url = (
|
||||
part.get("image_url", {}).get("url")
|
||||
if isinstance(part, dict) and part.get("type") == "image_url"
|
||||
else None
|
||||
)
|
||||
if isinstance(url, str) and url.startswith("data:image/"):
|
||||
uploaded = await self._upload_data_url(url)
|
||||
new_content.append(
|
||||
{"type": "image_url", "image_url": {"url": uploaded}}
|
||||
)
|
||||
else:
|
||||
new_content.append(part)
|
||||
resolved.append({**message, "content": new_content})
|
||||
|
||||
return resolved
|
||||
|
||||
async def _upload_data_url(self, data_url: str) -> str:
|
||||
"""Upload a ``data:image/...;base64,...`` URL via FastGPT and return its URL.
|
||||
|
||||
Falls back to the original data URL if parsing or upload fails so the turn
|
||||
still proceeds with inline base64.
|
||||
"""
|
||||
header, _, payload = data_url.partition(",")
|
||||
mime_type = header[len("data:") :].split(";", 1)[0].strip() or "image/jpeg"
|
||||
try:
|
||||
raw = base64.b64decode(payload, validate=True)
|
||||
except (binascii.Error, ValueError) as exc:
|
||||
logger.warning(f"FastGPT image upload skipped; invalid base64: {exc}")
|
||||
return data_url
|
||||
|
||||
suffix = _MIME_TO_EXT.get(mime_type, ".jpg")
|
||||
tmp_path: str | None = None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
||||
tmp.write(raw)
|
||||
tmp_path = tmp.name
|
||||
result = await self._client.upload_chat_image(
|
||||
appId=self._app_id,
|
||||
chatId=self._chat_id,
|
||||
file_path=tmp_path,
|
||||
)
|
||||
url = result.get("url") if isinstance(result, dict) else None
|
||||
if isinstance(url, str) and url:
|
||||
logger.info(
|
||||
f"FastGPT image uploaded chatId={self._chat_id} "
|
||||
f"bytes={len(raw)} url={url}"
|
||||
)
|
||||
return url
|
||||
logger.warning("FastGPT image upload returned no url; using inline base64")
|
||||
return data_url
|
||||
except Exception as exc:
|
||||
logger.warning(f"FastGPT image upload failed; using inline base64: {exc}")
|
||||
return data_url
|
||||
finally:
|
||||
if tmp_path is not None:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
async def _process_context(self, context: LLMContext) -> None:
|
||||
messages = self._build_fastgpt_messages(context)
|
||||
messages = await self._resolve_image_inputs(messages)
|
||||
variables = self._settings.variables or None
|
||||
|
||||
logger.info(
|
||||
"FastGPT chat completion "
|
||||
f"chatId={self._chat_id} appId={self._app_id or '-'} "
|
||||
f"variables={sorted((variables or {}).keys())} messages={messages!r}"
|
||||
f"variables={sorted((variables or {}).keys())} "
|
||||
f"messages={_redact_messages_for_log(messages)!r}"
|
||||
)
|
||||
|
||||
await self.start_ttfb_metrics()
|
||||
|
||||
@@ -65,6 +65,7 @@ def create_llm_service(
|
||||
app_id=config.app_id,
|
||||
greeting_prompt=greeting_prompt,
|
||||
timeout=config.timeout_sec,
|
||||
image_input_mode=config.image_input_mode,
|
||||
settings=FastGPTLLMSettings(
|
||||
model=config.model or "fastgpt",
|
||||
variables=variables,
|
||||
|
||||
@@ -24,6 +24,17 @@ const WS_LOG_GROUP_KEYS = {
|
||||
AUDIO_SEND: "send:input.audio",
|
||||
};
|
||||
const CAMERA_DONE_TEXT = "【拍摄完成】";
|
||||
// Sample images shown as thumbnails under the camera preview. Same-origin files
|
||||
// so they can be drawn to a canvas (for base64 + dimensions) without tainting.
|
||||
const SAMPLE_IMAGES = [
|
||||
{ src: "./samples/front-damage.jpg", label: "车辆前部" },
|
||||
{ src: "./samples/plate.jpg", label: "车牌" },
|
||||
{ src: "./samples/license.jpg", label: "驾驶证" },
|
||||
{ src: "./samples/scene.jpg", label: "事故现场" },
|
||||
];
|
||||
// Cap the longer edge before JPEG-encoding so payloads stay small.
|
||||
const IMAGE_MAX_DIM = 1280;
|
||||
const IMAGE_JPEG_QUALITY = 0.85;
|
||||
const CAMERA_STATE_PROMPTS = {
|
||||
2000: "请对准车辆碰撞部位拍摄照片。",
|
||||
2001: "请对准车辆碰撞部位拍摄照片。",
|
||||
@@ -62,6 +73,14 @@ const els = {
|
||||
cameraState: document.getElementById("camera-state"),
|
||||
cameraQuestion: document.getElementById("camera-question"),
|
||||
cameraDoneBtn: document.getElementById("camera-done-btn"),
|
||||
cameraPreview: document.getElementById("camera-preview"),
|
||||
cameraVideo: document.getElementById("camera-video"),
|
||||
cameraPhoto: document.getElementById("camera-photo"),
|
||||
cameraCanvas: document.getElementById("camera-canvas"),
|
||||
cameraStartBtn: document.getElementById("camera-start-btn"),
|
||||
cameraFlipBtn: document.getElementById("camera-flip-btn"),
|
||||
cameraUpload: document.getElementById("camera-upload"),
|
||||
cameraSamples: document.getElementById("camera-samples"),
|
||||
clearBtn: document.getElementById("clear-btn"),
|
||||
clearWsLogBtn: document.getElementById("clear-ws-log-btn"),
|
||||
wsLog: document.getElementById("ws-log"),
|
||||
@@ -125,6 +144,13 @@ const state = {
|
||||
assistantState: "",
|
||||
cameraState: "",
|
||||
|
||||
// Camera / image input.
|
||||
cameraStream: null,
|
||||
cameraActive: false,
|
||||
cameraFacing: "environment",
|
||||
pendingImage: null,
|
||||
samplesRendered: false,
|
||||
|
||||
// VU meter smoothing.
|
||||
meterLevel: 0,
|
||||
|
||||
@@ -211,14 +237,16 @@ function setAssistantState(value) {
|
||||
|
||||
function setCameraButtonEnabled() {
|
||||
if (!els.cameraDoneBtn) return;
|
||||
els.cameraDoneBtn.disabled =
|
||||
!state.connected || !state.cameraState ||
|
||||
!state.ws || state.ws.readyState !== WebSocket.OPEN;
|
||||
const wsReady =
|
||||
state.connected && state.ws && state.ws.readyState === WebSocket.OPEN;
|
||||
const hasImageSource = state.cameraActive || Boolean(state.pendingImage);
|
||||
els.cameraDoneBtn.disabled = !wsReady || !state.cameraState || !hasImageSource;
|
||||
}
|
||||
|
||||
function syncCameraDrawer(value) {
|
||||
const prompt = CAMERA_STATE_PROMPTS[value];
|
||||
const open = Boolean(prompt);
|
||||
const wasOpen = Boolean(state.cameraState);
|
||||
state.cameraState = open ? value : "";
|
||||
els.cameraDrawer.classList.toggle("is-open", open);
|
||||
els.conversation.classList.toggle("has-camera", open);
|
||||
@@ -226,9 +254,11 @@ function syncCameraDrawer(value) {
|
||||
if (open) {
|
||||
els.cameraState.textContent = `State ${value}`;
|
||||
els.cameraQuestion.textContent = prompt;
|
||||
renderSampleThumbnails();
|
||||
} else {
|
||||
els.cameraState.textContent = "State -";
|
||||
els.cameraQuestion.textContent = "";
|
||||
if (wasOpen) resetCameraInput();
|
||||
}
|
||||
setCameraButtonEnabled();
|
||||
}
|
||||
@@ -260,6 +290,35 @@ function addBubble(role, text) {
|
||||
return bubble;
|
||||
}
|
||||
|
||||
// Render a single chat bubble holding an image and (optionally) text together.
|
||||
function addImageBubble(role, imageUrl, text) {
|
||||
if (els.chatLog.querySelector(".chat__empty")) {
|
||||
els.chatLog.innerHTML = "";
|
||||
}
|
||||
const bubble = document.createElement("div");
|
||||
bubble.className = `bubble bubble--${role}`;
|
||||
if (role !== "system") {
|
||||
const tag = document.createElement("span");
|
||||
tag.className = "bubble__role";
|
||||
tag.textContent = role === "user" ? "You" : "Assistant";
|
||||
bubble.appendChild(tag);
|
||||
}
|
||||
const img = document.createElement("img");
|
||||
img.className = "bubble__image";
|
||||
img.src = imageUrl;
|
||||
img.alt = text || "image";
|
||||
bubble.appendChild(img);
|
||||
|
||||
const body = document.createElement("span");
|
||||
body.className = "bubble__text";
|
||||
body.textContent = text || "";
|
||||
bubble.appendChild(body);
|
||||
|
||||
els.chatLog.appendChild(bubble);
|
||||
scrollChatToBottom();
|
||||
return bubble;
|
||||
}
|
||||
|
||||
function appendToBubble(bubble, text) {
|
||||
const body = bubble.querySelector(".bubble__text");
|
||||
body.textContent += text;
|
||||
@@ -499,6 +558,9 @@ function compactWsPayload(payload) {
|
||||
if (typeof compact.audio === "string") {
|
||||
compact.audio = `<base64 ${compact.audio.length} chars>`;
|
||||
}
|
||||
if (typeof compact.image === "string") {
|
||||
compact.image = `<base64 ${compact.image.length} chars>`;
|
||||
}
|
||||
if (typeof compact.data === "string" && compact.data.length > 160) {
|
||||
compact.data = `<string ${compact.data.length} chars>`;
|
||||
}
|
||||
@@ -807,6 +869,219 @@ function resetPlaybackClock() {
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------ Camera / image */
|
||||
|
||||
function setPreviewMode(mode) {
|
||||
// mode: "camera" | "photo" | "idle"
|
||||
els.cameraPreview.classList.toggle("is-camera", mode === "camera");
|
||||
els.cameraPreview.classList.toggle("is-photo", mode === "photo");
|
||||
}
|
||||
|
||||
// Draw an <img>/<video> source to the canvas and return a normalized payload
|
||||
// (JPEG data URL + dimensions) suitable for an `input.image` message.
|
||||
function mediaToPayload(source) {
|
||||
const srcW = source.videoWidth || source.naturalWidth || source.width;
|
||||
const srcH = source.videoHeight || source.naturalHeight || source.height;
|
||||
if (!srcW || !srcH) return null;
|
||||
|
||||
let w = srcW;
|
||||
let h = srcH;
|
||||
const longest = Math.max(w, h);
|
||||
if (longest > IMAGE_MAX_DIM) {
|
||||
const scale = IMAGE_MAX_DIM / longest;
|
||||
w = Math.round(w * scale);
|
||||
h = Math.round(h * scale);
|
||||
}
|
||||
|
||||
const canvas = els.cameraCanvas;
|
||||
canvas.width = w;
|
||||
canvas.height = h;
|
||||
const ctx = canvas.getContext("2d");
|
||||
ctx.drawImage(source, 0, 0, w, h);
|
||||
|
||||
let dataUrl;
|
||||
try {
|
||||
dataUrl = canvas.toDataURL("image/jpeg", IMAGE_JPEG_QUALITY);
|
||||
} catch (err) {
|
||||
addWsLog("system", `image encode failed: ${err.message || err}`);
|
||||
return null;
|
||||
}
|
||||
return { dataUrl, mime: "image/jpeg", width: w, height: h };
|
||||
}
|
||||
|
||||
function setPendingImage(payload) {
|
||||
state.pendingImage = payload;
|
||||
if (payload) {
|
||||
els.cameraPhoto.src = payload.dataUrl;
|
||||
setPreviewMode("photo");
|
||||
}
|
||||
setCameraButtonEnabled();
|
||||
}
|
||||
|
||||
async function startCamera() {
|
||||
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
||||
addWsLog("system", "getUserMedia not available in this browser");
|
||||
return;
|
||||
}
|
||||
stopCameraStream();
|
||||
try {
|
||||
state.cameraStream = await navigator.mediaDevices.getUserMedia({
|
||||
video: { facingMode: state.cameraFacing },
|
||||
audio: false,
|
||||
});
|
||||
} catch (err) {
|
||||
addWsLog("system", `camera error: ${err.message || err}`);
|
||||
return;
|
||||
}
|
||||
els.cameraVideo.srcObject = state.cameraStream;
|
||||
try {
|
||||
await els.cameraVideo.play();
|
||||
} catch (_) {
|
||||
/* autoplay may resolve later */
|
||||
}
|
||||
state.cameraActive = true;
|
||||
state.pendingImage = null;
|
||||
setPreviewMode("camera");
|
||||
els.cameraStartBtn.classList.add("is-active");
|
||||
els.cameraStartBtn.textContent = "重新拍摄";
|
||||
els.cameraFlipBtn.hidden = false;
|
||||
clearSampleSelection();
|
||||
setCameraButtonEnabled();
|
||||
}
|
||||
|
||||
function stopCameraStream() {
|
||||
if (state.cameraStream) {
|
||||
state.cameraStream.getTracks().forEach((track) => track.stop());
|
||||
state.cameraStream = null;
|
||||
}
|
||||
els.cameraVideo.srcObject = null;
|
||||
state.cameraActive = false;
|
||||
els.cameraStartBtn.classList.remove("is-active");
|
||||
els.cameraStartBtn.textContent = "使用摄像头";
|
||||
els.cameraFlipBtn.hidden = true;
|
||||
}
|
||||
|
||||
function captureFromCamera() {
|
||||
const payload = mediaToPayload(els.cameraVideo);
|
||||
if (!payload) return null;
|
||||
stopCameraStream();
|
||||
setPendingImage(payload);
|
||||
return payload;
|
||||
}
|
||||
|
||||
// Load a same-origin/object URL into an <img> and resolve once decoded.
|
||||
function loadImage(src) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const img = new Image();
|
||||
img.onload = () => resolve(img);
|
||||
img.onerror = () => reject(new Error(`failed to load image: ${src}`));
|
||||
img.src = src;
|
||||
});
|
||||
}
|
||||
|
||||
async function selectFileImage(file) {
|
||||
if (!file) return;
|
||||
const objectUrl = URL.createObjectURL(file);
|
||||
try {
|
||||
const img = await loadImage(objectUrl);
|
||||
const payload = mediaToPayload(img);
|
||||
if (!payload) return;
|
||||
stopCameraStream();
|
||||
clearSampleSelection();
|
||||
setPendingImage(payload);
|
||||
} catch (err) {
|
||||
addWsLog("system", `upload error: ${err.message || err}`);
|
||||
} finally {
|
||||
URL.revokeObjectURL(objectUrl);
|
||||
}
|
||||
}
|
||||
|
||||
async function selectSampleImage(src, buttonEl) {
|
||||
try {
|
||||
const img = await loadImage(src);
|
||||
const payload = mediaToPayload(img);
|
||||
if (!payload) return;
|
||||
stopCameraStream();
|
||||
clearSampleSelection();
|
||||
if (buttonEl) buttonEl.classList.add("is-selected");
|
||||
setPendingImage(payload);
|
||||
} catch (err) {
|
||||
addWsLog("system", `sample error: ${err.message || err}`);
|
||||
}
|
||||
}
|
||||
|
||||
function clearSampleSelection() {
|
||||
els.cameraSamples
|
||||
.querySelectorAll(".camera-drawer__sample.is-selected")
|
||||
.forEach((el) => el.classList.remove("is-selected"));
|
||||
}
|
||||
|
||||
function renderSampleThumbnails() {
|
||||
if (state.samplesRendered) return;
|
||||
state.samplesRendered = true;
|
||||
els.cameraSamples.innerHTML = "";
|
||||
for (const sample of SAMPLE_IMAGES) {
|
||||
const btn = document.createElement("button");
|
||||
btn.type = "button";
|
||||
btn.className = "camera-drawer__sample";
|
||||
btn.title = sample.label;
|
||||
const img = document.createElement("img");
|
||||
img.src = sample.src;
|
||||
img.alt = sample.label;
|
||||
btn.appendChild(img);
|
||||
btn.addEventListener("click", () => selectSampleImage(sample.src, btn));
|
||||
els.cameraSamples.appendChild(btn);
|
||||
}
|
||||
}
|
||||
|
||||
function resetCameraInput() {
|
||||
stopCameraStream();
|
||||
state.pendingImage = null;
|
||||
clearSampleSelection();
|
||||
els.cameraPhoto.removeAttribute("src");
|
||||
setPreviewMode("idle");
|
||||
setCameraButtonEnabled();
|
||||
}
|
||||
|
||||
function sendImage(payload, text) {
|
||||
if (!payload) return false;
|
||||
if (!state.ws || state.ws.readyState !== WebSocket.OPEN) return false;
|
||||
|
||||
const message = {
|
||||
type: "input.image",
|
||||
image: payload.dataUrl,
|
||||
mime_type: payload.mime,
|
||||
width: payload.width,
|
||||
height: payload.height,
|
||||
text: text || CAMERA_DONE_TEXT,
|
||||
interrupt: true,
|
||||
};
|
||||
|
||||
wsSend(JSON.stringify(message));
|
||||
// Mirror the text-input path: interrupt in-flight bot audio and render the
|
||||
// user's image + text together as one local bubble (the engine does not echo
|
||||
// image input back as a transcript event).
|
||||
stopPlaybackQueue();
|
||||
state.currentAssistantBubble = null;
|
||||
addImageBubble("user", payload.dataUrl, text || CAMERA_DONE_TEXT);
|
||||
return true;
|
||||
}
|
||||
|
||||
function submitCameraImage() {
|
||||
// If the live camera is on, grab the current frame first; otherwise use the
|
||||
// already-selected (uploaded / sample / captured) image.
|
||||
let payload = state.pendingImage;
|
||||
if (state.cameraActive) {
|
||||
payload = captureFromCamera() || payload;
|
||||
}
|
||||
if (!payload) return;
|
||||
// Keep the existing workflow contract: the accompanying text stays the
|
||||
// "【拍摄完成】" marker that advances the FastGPT camera step; the image is
|
||||
// the new multimodal attachment.
|
||||
if (!sendImage(payload, CAMERA_DONE_TEXT)) return;
|
||||
resetCameraInput();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------- Chat updates */
|
||||
|
||||
function handleUserTranscript(text) {
|
||||
@@ -1139,7 +1414,23 @@ els.clearWsLogBtn.addEventListener("click", () => {
|
||||
|
||||
els.cameraDoneBtn.addEventListener("click", () => {
|
||||
if (!state.cameraState) return;
|
||||
sendText(CAMERA_DONE_TEXT);
|
||||
submitCameraImage();
|
||||
});
|
||||
|
||||
els.cameraStartBtn.addEventListener("click", () => {
|
||||
startCamera();
|
||||
});
|
||||
|
||||
els.cameraFlipBtn.addEventListener("click", () => {
|
||||
state.cameraFacing =
|
||||
state.cameraFacing === "environment" ? "user" : "environment";
|
||||
if (state.cameraActive) startCamera();
|
||||
});
|
||||
|
||||
els.cameraUpload.addEventListener("change", (event) => {
|
||||
const file = event.target.files && event.target.files[0];
|
||||
selectFileImage(file);
|
||||
event.target.value = "";
|
||||
});
|
||||
|
||||
function autosizeTextarea() {
|
||||
@@ -1174,6 +1465,7 @@ els.textInput.addEventListener("keydown", (event) => {
|
||||
});
|
||||
|
||||
window.addEventListener("beforeunload", () => {
|
||||
stopCameraStream();
|
||||
if (state.ws) {
|
||||
try {
|
||||
state.ws.close();
|
||||
|
||||
@@ -82,16 +82,65 @@
|
||||
<span id="camera-state" class="camera-drawer__state">State -</span>
|
||||
</div>
|
||||
|
||||
<div class="camera-drawer__preview" aria-hidden="true">
|
||||
<div id="camera-preview" class="camera-drawer__preview">
|
||||
<video
|
||||
id="camera-video"
|
||||
class="camera-drawer__video"
|
||||
playsinline
|
||||
muted
|
||||
autoplay
|
||||
></video>
|
||||
<img
|
||||
id="camera-photo"
|
||||
class="camera-drawer__photo"
|
||||
alt="Selected image preview"
|
||||
/>
|
||||
<span class="camera-drawer__corner camera-drawer__corner--tl"></span>
|
||||
<span class="camera-drawer__corner camera-drawer__corner--tr"></span>
|
||||
<span class="camera-drawer__corner camera-drawer__corner--bl"></span>
|
||||
<span class="camera-drawer__corner camera-drawer__corner--br"></span>
|
||||
<span class="camera-drawer__lens"></span>
|
||||
<span class="camera-drawer__scan"></span>
|
||||
<span id="camera-placeholder" class="camera-drawer__placeholder">
|
||||
打开摄像头实时拍摄,或从下方选择 / 上传图片
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<p id="camera-question" class="camera-drawer__question"></p>
|
||||
|
||||
<div class="camera-drawer__sources">
|
||||
<button
|
||||
id="camera-start-btn"
|
||||
class="btn btn--ghost camera-drawer__source"
|
||||
type="button"
|
||||
>
|
||||
使用摄像头
|
||||
</button>
|
||||
<button
|
||||
id="camera-flip-btn"
|
||||
class="btn btn--ghost camera-drawer__source"
|
||||
type="button"
|
||||
hidden
|
||||
>
|
||||
切换摄像头
|
||||
</button>
|
||||
<label class="btn btn--ghost camera-drawer__source">
|
||||
上传图片
|
||||
<input
|
||||
id="camera-upload"
|
||||
type="file"
|
||||
accept="image/*"
|
||||
hidden
|
||||
/>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div
|
||||
id="camera-samples"
|
||||
class="camera-drawer__samples"
|
||||
aria-label="示例图片,点击选择"
|
||||
></div>
|
||||
|
||||
<button
|
||||
id="camera-done-btn"
|
||||
class="btn btn--primary camera-drawer__button"
|
||||
@@ -100,6 +149,7 @@
|
||||
>
|
||||
拍摄完成
|
||||
</button>
|
||||
<canvas id="camera-canvas" hidden></canvas>
|
||||
</div>
|
||||
</aside>
|
||||
|
||||
|
||||
BIN
examples/webpage/samples/front-damage.jpg
Normal file
BIN
examples/webpage/samples/front-damage.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 18 KiB |
BIN
examples/webpage/samples/license.jpg
Normal file
BIN
examples/webpage/samples/license.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 17 KiB |
BIN
examples/webpage/samples/plate.jpg
Normal file
BIN
examples/webpage/samples/plate.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 17 KiB |
BIN
examples/webpage/samples/scene.jpg
Normal file
BIN
examples/webpage/samples/scene.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 17 KiB |
@@ -148,6 +148,45 @@ body {
|
||||
background-size: 34px 34px, 34px 34px, auto, auto;
|
||||
}
|
||||
|
||||
.camera-drawer__video,
|
||||
.camera-drawer__photo {
|
||||
position: absolute;
|
||||
inset: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
display: none;
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
.camera-drawer__preview.is-camera .camera-drawer__video {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.camera-drawer__preview.is-photo .camera-drawer__photo {
|
||||
display: block;
|
||||
}
|
||||
|
||||
/* Hide the decorative lens/scan/placeholder once real media is showing. */
|
||||
.camera-drawer__preview.is-camera .camera-drawer__lens,
|
||||
.camera-drawer__preview.is-photo .camera-drawer__lens,
|
||||
.camera-drawer__preview.is-camera .camera-drawer__scan,
|
||||
.camera-drawer__preview.is-photo .camera-drawer__scan,
|
||||
.camera-drawer__preview.is-camera .camera-drawer__placeholder,
|
||||
.camera-drawer__preview.is-photo .camera-drawer__placeholder {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.camera-drawer__placeholder {
|
||||
position: absolute;
|
||||
inset: auto 18px 16px;
|
||||
z-index: 2;
|
||||
color: rgba(214, 220, 235, 0.78);
|
||||
font-size: 12px;
|
||||
line-height: 1.5;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.camera-drawer__lens {
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
@@ -174,6 +213,7 @@ body {
|
||||
|
||||
.camera-drawer__corner {
|
||||
position: absolute;
|
||||
z-index: 2;
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
border-color: rgba(255, 255, 255, 0.7);
|
||||
@@ -229,6 +269,62 @@ body {
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.camera-drawer__sources {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.camera-drawer__source {
|
||||
flex: 1 1 auto;
|
||||
min-height: 38px;
|
||||
font-size: 13px;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.camera-drawer__source.is-active {
|
||||
border-color: var(--success);
|
||||
color: var(--success);
|
||||
}
|
||||
|
||||
.camera-drawer__samples {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(4, 1fr);
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.camera-drawer__samples:empty {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.camera-drawer__sample {
|
||||
position: relative;
|
||||
aspect-ratio: 4 / 3;
|
||||
padding: 0;
|
||||
border: 2px solid transparent;
|
||||
border-radius: 10px;
|
||||
overflow: hidden;
|
||||
cursor: pointer;
|
||||
background: #0f141f;
|
||||
}
|
||||
|
||||
.camera-drawer__sample img {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.camera-drawer__sample:hover {
|
||||
border-color: rgba(149, 160, 187, 0.6);
|
||||
}
|
||||
|
||||
.camera-drawer__sample.is-selected {
|
||||
border-color: var(--success);
|
||||
box-shadow: 0 0 0 1px var(--success);
|
||||
}
|
||||
|
||||
.app__body {
|
||||
display: grid;
|
||||
grid-template-columns: minmax(0, 1fr) clamp(300px, 32vw, 420px);
|
||||
@@ -511,6 +607,18 @@ body {
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.bubble__image {
|
||||
display: block;
|
||||
max-width: 240px;
|
||||
width: 100%;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.bubble__image + .bubble__text:empty {
|
||||
display: none;
|
||||
}
|
||||
|
||||
/* WebSocket log --------------------------------------------------------- */
|
||||
|
||||
.ws-log {
|
||||
|
||||
Reference in New Issue
Block a user