Sync with engine v5

This commit is contained in:
Xin Wang
2026-06-03 12:36:18 +08:00
parent 056a8a4ad8
commit 705a63dd25
17 changed files with 854 additions and 111 deletions

View File

@@ -131,6 +131,7 @@ class LLMConfig:
variables: dict[str, str] = field(default_factory=dict)
detail: bool = False
timeout_sec: float = 60.0
image_input_mode: str = "base64"
@property
def is_fastgpt(self) -> bool:
@@ -236,6 +237,15 @@ def config_from_dict(data: dict) -> EngineConfig:
if llm.get("chat_id") == "":
llm["chat_id"] = None
llm.pop("send_system_prompt", None)
image_input_mode = str(
llm.get("image_input_mode", LLMConfig().image_input_mode)
).strip().lower()
if image_input_mode not in {"base64", "upload"}:
raise ValueError(
"services.llm.image_input_mode must be 'base64' or 'upload', "
f"got {llm.get('image_input_mode')!r}"
)
llm["image_input_mode"] = image_input_mode
if llm.get("app_id") == "":
llm["app_id"] = None
if not isinstance(llm.get("variables"), dict):

View File

@@ -1,5 +1,10 @@
from __future__ import annotations
import asyncio
import base64
import binascii
import os
import tempfile
import uuid
from dataclasses import dataclass, field
from typing import Any
@@ -19,6 +24,7 @@ from pipecat.frames.frames import (
LLMFullResponseStartFrame,
LLMTextFrame,
OutputTransportMessageFrame,
OutputTransportMessageUrgentFrame,
)
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.frame_processor import FrameDirection
@@ -129,6 +135,50 @@ def _interactive_spoken_prompt(event: FastGPTInteractiveEvent) -> str:
return "请继续。"
IMAGE_INPUT_MODE_BASE64 = "base64"
IMAGE_INPUT_MODE_UPLOAD = "upload"
SUPPORTED_IMAGE_INPUT_MODES = frozenset({IMAGE_INPUT_MODE_BASE64, IMAGE_INPUT_MODE_UPLOAD})
_MIME_TO_EXT = {
"image/jpeg": ".jpg",
"image/png": ".png",
"image/webp": ".webp",
}
def _message_has_image(message: dict[str, Any]) -> bool:
content = message.get("content")
if not isinstance(content, list):
return False
return any(
isinstance(part, dict) and part.get("type") == "image_url"
for part in content
)
def _redact_messages_for_log(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Replace base64 image data URLs with a short placeholder for logging."""
redacted: list[dict[str, Any]] = []
for message in messages:
content = message.get("content")
if not isinstance(content, list):
redacted.append(message)
continue
parts: list[Any] = []
for part in content:
if (
isinstance(part, dict)
and part.get("type") == "image_url"
and isinstance(part.get("image_url"), dict)
):
url = str(part["image_url"].get("url") or "")
parts.append({"type": "image_url", "image_url": {"url": f"<{len(url)} chars>"}})
else:
parts.append(part)
redacted.append({**message, "content": parts})
return redacted
@dataclass
class FastGPTLLMSettings(LLMSettings):
variables: dict[str, Any] = field(default_factory=dict)
@@ -167,6 +217,7 @@ class FastGPTLLMService(LLMService):
app_id: str | None = None,
greeting_prompt: str | None = None,
timeout: float = 60.0,
image_input_mode: str = IMAGE_INPUT_MODE_BASE64,
settings: FastGPTLLMSettings | None = None,
**kwargs,
) -> None:
@@ -185,6 +236,20 @@ class FastGPTLLMService(LLMService):
)
self._active_response = None
mode = (image_input_mode or IMAGE_INPUT_MODE_BASE64).strip().lower()
if mode not in SUPPORTED_IMAGE_INPUT_MODES:
raise ValueError(
f"Unsupported image_input_mode {image_input_mode!r}; "
f"expected one of {sorted(SUPPORTED_IMAGE_INPUT_MODES)}"
)
if mode == IMAGE_INPUT_MODE_UPLOAD and not self._app_id:
logger.warning(
"FastGPT image_input_mode='upload' requires app_id; "
"falling back to inline base64"
)
mode = IMAGE_INPUT_MODE_BASE64
self._image_input_mode = mode
@property
def app_id(self) -> str:
return self._app_id
@@ -305,26 +370,114 @@ class FastGPTLLMService(LLMService):
if response is not None:
await response.aclose()
def _build_fastgpt_messages(self, context: LLMContext) -> list[dict[str, str]]:
def _build_fastgpt_messages(self, context: LLMContext) -> list[dict[str, Any]]:
raw_messages = context.get_messages()
for message in reversed(raw_messages):
if not isinstance(message, dict) or message.get("role") != "user":
continue
if _message_has_image(message):
# Multimodal turn: forward the OpenAI-style content list as-is
# (text parts + image_url with a base64 data URL). FastGPT's
# /chat/completions accepts this directly.
return [{"role": "user", "content": message["content"]}]
text = _message_text(message)
if text:
return [{"role": "user", "content": text}]
return [{"role": "user", "content": self._greeting_prompt}]
async def _resolve_image_inputs(
self, messages: list[dict[str, Any]]
) -> list[dict[str, Any]]:
"""In ``upload`` mode, replace inline base64 image data URLs with uploaded URLs.
In ``base64`` mode the messages are returned untouched (inline data URLs).
New message/content objects are built so the shared ``LLMContext`` messages
are never mutated.
"""
if self._image_input_mode != IMAGE_INPUT_MODE_UPLOAD:
return messages
resolved: list[dict[str, Any]] = []
for message in messages:
content = message.get("content")
if not isinstance(content, list):
resolved.append(message)
continue
new_content: list[Any] = []
for part in content:
url = (
part.get("image_url", {}).get("url")
if isinstance(part, dict) and part.get("type") == "image_url"
else None
)
if isinstance(url, str) and url.startswith("data:image/"):
uploaded = await self._upload_data_url(url)
new_content.append(
{"type": "image_url", "image_url": {"url": uploaded}}
)
else:
new_content.append(part)
resolved.append({**message, "content": new_content})
return resolved
async def _upload_data_url(self, data_url: str) -> str:
"""Upload a ``data:image/...;base64,...`` URL via FastGPT and return its URL.
Falls back to the original data URL if parsing or upload fails so the turn
still proceeds with inline base64.
"""
header, _, payload = data_url.partition(",")
mime_type = header[len("data:"):].split(";", 1)[0].strip() or "image/jpeg"
try:
raw = base64.b64decode(payload, validate=True)
except (binascii.Error, ValueError) as exc:
logger.warning(f"FastGPT image upload skipped; invalid base64: {exc}")
return data_url
suffix = _MIME_TO_EXT.get(mime_type, ".jpg")
tmp_path: str | None = None
try:
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(raw)
tmp_path = tmp.name
result = await self._client.upload_chat_image(
appId=self._app_id,
chatId=self._chat_id,
file_path=tmp_path,
)
url = result.get("url") if isinstance(result, dict) else None
if isinstance(url, str) and url:
logger.info(
f"FastGPT image uploaded chatId={self._chat_id} "
f"bytes={len(raw)} url={url}"
)
return url
logger.warning("FastGPT image upload returned no url; using inline base64")
return data_url
except Exception as exc:
logger.warning(f"FastGPT image upload failed; using inline base64: {exc}")
return data_url
finally:
if tmp_path is not None:
try:
os.unlink(tmp_path)
except OSError:
pass
async def _process_context(self, context: LLMContext) -> None:
messages = self._build_fastgpt_messages(context)
messages = await self._resolve_image_inputs(messages)
variables = self._settings.variables or None
logger.info(
"FastGPT chat completion "
f"chatId={self._chat_id} appId={self._app_id or '-'} "
f"variables={sorted((variables or {}).keys())} messages={messages!r}"
f"variables={sorted((variables or {}).keys())} "
f"messages={_redact_messages_for_log(messages)!r}"
)
await self.start_ttfb_metrics()

View File

@@ -23,6 +23,7 @@ from pipecat.processors.aggregators.llm_response_universal import (
UserTurnStoppedMessage,
)
from pipecat.serializers.base_serializer import FrameSerializer
from pipecat.serializers.protobuf import ProtobufFrameSerializer
from pipecat.transports.websocket.fastapi import (
FastAPIWebsocketParams,
FastAPIWebsocketTransport,
@@ -68,6 +69,15 @@ async def run_product_voice_pipeline(websocket, config: EngineConfig) -> None:
)
async def run_voice_pipeline(websocket, config: EngineConfig) -> None:
await run_pipeline_with_serializer(
websocket,
config,
serializer=ProtobufFrameSerializer(),
client_label="Pipecat protobuf",
)
async def run_pipeline_with_serializer(
websocket,
config: EngineConfig,
@@ -120,8 +130,13 @@ async def run_pipeline_with_serializer(
stop_secs=config.turn.vad.stop_secs,
min_volume=config.turn.vad.min_volume,
)
# Use a simple silence-timeout strategy for streaming ASR so short Chinese
# pauses do not split one logical utterance into multiple LLM calls.
# Replace pipecat's default stop strategy (Smart Turn v3) with a simple
# silence-timeout strategy. Smart Turn v3 was finalizing every short
# Chinese phrase as a complete turn, which caused one logical utterance
# to become several LLM calls and several user bubbles in the UI. The
# timeout strategy waits for `user_speech_timeout_sec` of silence
# (re-armed every time the user resumes speaking) before declaring the
# turn finished — which is what we actually want for streaming ASRs.
user_turn_strategies = UserTurnStrategies(
start=[
InterruptionGateUserTurnStartStrategy(
@@ -225,22 +240,6 @@ async def run_pipeline_with_serializer(
nonlocal idle_prompt_count
idle_prompt_count = 0
@user_aggregator.event_handler("on_user_turn_idle")
async def on_user_turn_idle(aggregator):
nonlocal idle_prompt_count
text = config.turn.idle_prompt_text.strip()
if not text or config.turn.idle_prompt_max_count <= 0:
return
if idle_prompt_count >= config.turn.idle_prompt_max_count:
return
idle_prompt_count += 1
logger.info(
"User idle prompt triggered "
f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}"
)
await aggregator.push_frame(TTSSpeakFrame(text))
@user_aggregator.event_handler("on_user_turn_stopped")
async def on_user_turn_stopped(_aggregator, _strategy, message: UserTurnStoppedMessage):
logger.info(f"User: {message.content}")
@@ -268,5 +267,25 @@ async def run_pipeline_with_serializer(
)
text_stream.take_interrupted_stream_text()
@user_aggregator.event_handler("on_user_turn_idle")
async def on_user_turn_idle(aggregator):
nonlocal idle_prompt_count
text = config.turn.idle_prompt_text.strip()
if not text or config.turn.idle_prompt_max_count <= 0:
return
if idle_prompt_count >= config.turn.idle_prompt_max_count:
return
idle_prompt_count += 1
logger.info(
"User idle prompt triggered "
f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}"
)
await aggregator.push_frame(TTSSpeakFrame(text))
# NOTE: assistant turn started/final events are emitted by
# ProductTextStreamProcessor, upstream of TTS, so text streams to the
# client ahead of audio. This logger is kept for server-side visibility.
runner = PipelineRunner(handle_sigint=False)
await runner.run(task)

View File

@@ -65,6 +65,7 @@ def create_llm_service(
app_id=config.app_id,
greeting_prompt=greeting_prompt,
timeout=config.timeout_sec,
image_input_mode=config.image_input_mode,
settings=FastGPTLLMSettings(
model=config.model or "fastgpt",
variables=variables,

View File

@@ -6,6 +6,7 @@ from pipecat.frames.frames import (
Frame,
InputTransportMessageFrame,
LLMMessagesAppendFrame,
UserImageRawFrame,
UserStartedSpeakingFrame,
UserStoppedSpeakingFrame,
)
@@ -13,11 +14,17 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
class ProductTextInputProcessor(FrameProcessor):
"""Converts product text-input transport messages into LLM turns."""
"""Converts product text-input transport messages and marks image input as user activity."""
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, UserImageRawFrame):
await self.broadcast_frame(UserStartedSpeakingFrame)
await self.push_frame(frame, direction)
await self.broadcast_frame(UserStoppedSpeakingFrame)
return
if not isinstance(frame, InputTransportMessageFrame):
await self.push_frame(frame, direction)
return

View File

@@ -154,6 +154,8 @@ class ProductTextStreamProcessor(FrameProcessor):
await self.push_frame(frame, direction)
await self._handle_interrupt()
elif isinstance(frame, TTSSpeakFrame):
# Fixed-text / direct-speech path: there's no LLM cycle, so
# synthesize one started/delta/final sequence for the spoken text.
text = frame.text or ""
await self.push_frame(frame, direction)
await self._start_turn()
@@ -172,6 +174,8 @@ class ProductTextStreamProcessor(FrameProcessor):
async def _delta(self, text: str) -> None:
if not self._turn_active:
# A text frame outside a turn shouldn't happen, but if it does,
# synthesize a started boundary so the client renders sensibly.
await self._start_turn()
self._aggregation.append(text)
await self._emit("response.text.delta", text=text)

View File

@@ -18,7 +18,12 @@ _COUNTABLE_TEXT_RE = re.compile(r"[\w\u4e00-\u9fff]", re.UNICODE)
class InterruptionGateUserTurnStartStrategy(BaseUserTurnStartStrategy):
"""Starts user turns only after likely intentional speech."""
"""Starts user turns only after likely intentional speech.
When the assistant is speaking, short background speech should not barge in
unless it is a common answer to a yes/no style question. When the assistant
is not speaking, any non-empty transcript can start a normal user turn.
"""
def __init__(
self,

View File

@@ -24,6 +24,19 @@ const WS_LOG_GROUP_KEYS = {
AUDIO_SEND: "send:input.audio",
};
const CAMERA_DONE_TEXT = "【拍摄完成】";
// Sample images shown as thumbnails under the camera preview. Same-origin files
// so they can be drawn to a canvas (for base64 + dimensions) without tainting.
const SAMPLE_IMAGES = [
{ src: "./samples/damage1.png", label: "车辆前部" },
{ src: "./samples/damage2.png", label: "车辆后部" },
{ src: "./samples/plate1.jpg", label: "车牌 1" },
{ src: "./samples/plate2.jpg", label: "车牌 2" },
{ src: "./samples/user1.jpg", label: "人物 1" },
{ src: "./samples/user2.jpg", label: "人物 2" },
];
// Cap the longer edge before JPEG-encoding so payloads stay small.
const IMAGE_MAX_DIM = 1280;
const IMAGE_JPEG_QUALITY = 0.85;
const CAMERA_STATE_PROMPTS = {
2000: "请对准车辆碰撞部位拍摄照片。",
2001: "请对准车辆碰撞部位拍摄照片。",
@@ -62,6 +75,15 @@ const els = {
cameraState: document.getElementById("camera-state"),
cameraQuestion: document.getElementById("camera-question"),
cameraDoneBtn: document.getElementById("camera-done-btn"),
cameraPreview: document.getElementById("camera-preview"),
cameraVideo: document.getElementById("camera-video"),
cameraPhoto: document.getElementById("camera-photo"),
cameraCanvas: document.getElementById("camera-canvas"),
cameraStartBtn: document.getElementById("camera-start-btn"),
cameraDeviceRow: document.getElementById("camera-device-row"),
cameraDeviceSelect: document.getElementById("camera-device-select"),
cameraUpload: document.getElementById("camera-upload"),
cameraSamples: document.getElementById("camera-samples"),
clearBtn: document.getElementById("clear-btn"),
clearWsLogBtn: document.getElementById("clear-ws-log-btn"),
wsLog: document.getElementById("ws-log"),
@@ -125,6 +147,14 @@ const state = {
assistantState: "",
cameraState: "",
// Camera / image input.
cameraStream: null,
cameraActive: false,
cameraFacing: "environment",
videoDevices: [],
pendingImage: null,
samplesRendered: false,
// VU meter smoothing.
meterLevel: 0,
@@ -143,15 +173,15 @@ function setConnectButton() {
els.chatId.disabled = state.connected || state.connecting;
els.copyChatIdBtn.disabled = !state.connected || !state.chatId;
if (state.connecting) {
els.connectBtn.textContent = "Connecting…";
els.connectBtn.textContent = "连接中…";
els.connectBtn.disabled = true;
els.connectBtn.classList.remove("is-disconnect");
} else if (state.connected) {
els.connectBtn.textContent = "Disconnect";
els.connectBtn.textContent = "断开连接";
els.connectBtn.disabled = false;
els.connectBtn.classList.add("is-disconnect");
} else {
els.connectBtn.textContent = "Connect";
els.connectBtn.textContent = "连接";
els.connectBtn.disabled = false;
els.connectBtn.classList.remove("is-disconnect");
}
@@ -180,8 +210,8 @@ async function copyChatId() {
function setMicButton() {
els.micBtn.disabled = !state.connected;
els.micBtn.setAttribute("aria-pressed", state.micEnabled ? "true" : "false");
els.micBtn.title = state.micEnabled ? "Mute mic" : "Unmute mic";
els.micLabel.textContent = state.micEnabled ? "Mute mic" : "Enable mic";
els.micBtn.title = state.micEnabled ? "关闭麦克风" : "开启麦克风";
els.micLabel.textContent = state.micEnabled ? "关闭麦克风" : "开启麦克风";
els.micIndicator.classList.toggle("is-active", state.micEnabled);
}
@@ -204,41 +234,40 @@ function setAssistantState(value) {
const label = text.length > 32 ? `${text.slice(0, 31)}` : text;
state.assistantState = text;
els.stateIndicator.classList.toggle("is-active", Boolean(text));
els.stateLabel.textContent = label ? `State ${label}` : "State -";
els.stateIndicator.title = label ? `Assistant state: ${text}` : "Assistant state";
els.stateLabel.textContent = label ? `状态 ${label}` : "状态 -";
els.stateIndicator.title = label ? `助手状态:${text}` : "助手状态";
syncCameraDrawer(text);
}
function setCameraButtonEnabled() {
if (!els.cameraDoneBtn) return;
els.cameraDoneBtn.disabled =
!state.connected || !state.cameraState ||
!state.ws || state.ws.readyState !== WebSocket.OPEN;
const wsReady =
state.connected && state.ws && state.ws.readyState === WebSocket.OPEN;
const hasImageSource = state.cameraActive || Boolean(state.pendingImage);
els.cameraDoneBtn.disabled = !wsReady || !state.cameraState || !hasImageSource;
}
function syncCameraDrawer(value) {
const prompt = CAMERA_STATE_PROMPTS[value];
const open = Boolean(prompt);
const wasOpen = Boolean(state.cameraState);
state.cameraState = open ? value : "";
els.cameraDrawer.classList.toggle("is-open", open);
els.conversation.classList.toggle("has-camera", open);
els.cameraDrawer.setAttribute("aria-hidden", open ? "false" : "true");
if (open) {
els.cameraState.textContent = `State ${value}`;
els.cameraState.textContent = `状态 ${value}`;
els.cameraQuestion.textContent = prompt;
renderSampleThumbnails();
selectDefaultImage();
} else {
els.cameraState.textContent = "State -";
els.cameraState.textContent = "状态 -";
els.cameraQuestion.textContent = "";
if (wasOpen) resetCameraInput();
}
setCameraButtonEnabled();
}
function updateCameraQuestion(text) {
const value = typeof text === "string" ? text.trim() : "";
if (!state.cameraState || !value) return;
els.cameraQuestion.textContent = value;
}
function addBubble(role, text) {
if (els.chatLog.querySelector(".chat__empty")) {
els.chatLog.innerHTML = "";
@@ -248,7 +277,7 @@ function addBubble(role, text) {
if (role !== "system") {
const tag = document.createElement("span");
tag.className = "bubble__role";
tag.textContent = role === "user" ? "You" : "Assistant";
tag.textContent = role === "user" ? "" : "助手";
bubble.appendChild(tag);
}
const body = document.createElement("span");
@@ -260,6 +289,35 @@ function addBubble(role, text) {
return bubble;
}
// Render a single chat bubble holding an image and (optionally) text together.
function addImageBubble(role, imageUrl, text) {
if (els.chatLog.querySelector(".chat__empty")) {
els.chatLog.innerHTML = "";
}
const bubble = document.createElement("div");
bubble.className = `bubble bubble--${role}`;
if (role !== "system") {
const tag = document.createElement("span");
tag.className = "bubble__role";
tag.textContent = role === "user" ? "你" : "助手";
bubble.appendChild(tag);
}
const img = document.createElement("img");
img.className = "bubble__image";
img.src = imageUrl;
img.alt = text || "image";
bubble.appendChild(img);
const body = document.createElement("span");
body.className = "bubble__text";
body.textContent = text || "";
bubble.appendChild(body);
els.chatLog.appendChild(bubble);
scrollChatToBottom();
return bubble;
}
function appendToBubble(bubble, text) {
const body = bubble.querySelector(".bubble__text");
body.textContent += text;
@@ -276,7 +334,7 @@ function clearChat() {
setAssistantState("");
const empty = document.createElement("div");
empty.className = "chat__empty";
empty.innerHTML = "<p>Chat cleared.</p>";
empty.innerHTML = "<p>对话已清空。</p>";
els.chatLog.appendChild(empty);
}
@@ -499,6 +557,9 @@ function compactWsPayload(payload) {
if (typeof compact.audio === "string") {
compact.audio = `<base64 ${compact.audio.length} chars>`;
}
if (typeof compact.image === "string") {
compact.image = `<base64 ${compact.image.length} chars>`;
}
if (typeof compact.data === "string" && compact.data.length > 160) {
compact.data = `<string ${compact.data.length} chars>`;
}
@@ -595,7 +656,7 @@ function wsSend(data) {
function clearWsLog() {
state.wsLogGroup = null;
els.wsLog.innerHTML =
'<div class="ws-log__empty">No websocket events yet.</div>';
'<div class="ws-log__empty">暂无 WebSocket 事件。</div>';
}
/* ---------------------------------------------------------------- Audio */
@@ -618,13 +679,13 @@ function renderMicDevices() {
const defaultOption = document.createElement("option");
defaultOption.value = "";
defaultOption.textContent = "Default microphone";
defaultOption.textContent = "默认麦克风";
els.micSelect.appendChild(defaultOption);
state.micDevices.forEach((device, index) => {
const option = document.createElement("option");
option.value = device.deviceId;
option.textContent = device.label || `Microphone ${index + 1}`;
option.textContent = device.label || `麦克风 ${index + 1}`;
els.micSelect.appendChild(option);
});
@@ -691,7 +752,7 @@ async function startMic() {
state.micSourceNode.connect(state.recorderNode);
state.micEnabled = true;
addWsLog("system", "mic capture started (binary input.audio frames)");
addWsLog("system", "麦克风已开启PCM 音频流)");
setMicButton();
}
@@ -727,7 +788,7 @@ function stopMic() {
state.micEnabled = false;
updateMeter(0);
if (wasEnabled) {
addWsLog("system", "mic capture stopped");
addWsLog("system", "麦克风已关闭");
}
setMicButton();
}
@@ -807,6 +868,272 @@ function resetPlaybackClock() {
}
}
/* ------------------------------------------------------ Camera / image */
function setPreviewMode(mode) {
// mode: "camera" | "photo" | "idle"
els.cameraPreview.classList.toggle("is-camera", mode === "camera");
els.cameraPreview.classList.toggle("is-photo", mode === "photo");
}
// Draw an <img>/<video> source to the canvas and return a normalized payload
// (JPEG data URL + dimensions) suitable for an `input.image` message.
function mediaToPayload(source) {
const srcW = source.videoWidth || source.naturalWidth || source.width;
const srcH = source.videoHeight || source.naturalHeight || source.height;
if (!srcW || !srcH) return null;
let w = srcW;
let h = srcH;
const longest = Math.max(w, h);
if (longest > IMAGE_MAX_DIM) {
const scale = IMAGE_MAX_DIM / longest;
w = Math.round(w * scale);
h = Math.round(h * scale);
}
const canvas = els.cameraCanvas;
canvas.width = w;
canvas.height = h;
const ctx = canvas.getContext("2d");
ctx.drawImage(source, 0, 0, w, h);
let dataUrl;
try {
dataUrl = canvas.toDataURL("image/jpeg", IMAGE_JPEG_QUALITY);
} catch (err) {
addWsLog("system", `图片编码失败:${err.message || err}`);
return null;
}
return { dataUrl, mime: "image/jpeg", width: w, height: h };
}
function setPendingImage(payload) {
state.pendingImage = payload;
if (payload) {
els.cameraPhoto.src = payload.dataUrl;
setPreviewMode("photo");
}
setCameraButtonEnabled();
}
async function refreshVideoDevices() {
try {
const devices = await navigator.mediaDevices.enumerateDevices();
state.videoDevices = devices.filter((d) => d.kind === "videoinput");
} catch (_) {
state.videoDevices = [];
}
}
// Fill the camera dropdown from the enumerated devices. Labels are only exposed
// after camera permission has been granted, so before that we show generic
// names ("摄像头 1", …) or just the default option.
function populateDeviceSelect(activeDeviceId) {
const sel = els.cameraDeviceSelect;
sel.innerHTML = "";
if (state.videoDevices.length === 0) {
const opt = document.createElement("option");
opt.value = "";
opt.textContent = "默认摄像头";
sel.appendChild(opt);
sel.disabled = true;
return;
}
state.videoDevices.forEach((device, index) => {
const opt = document.createElement("option");
opt.value = device.deviceId;
opt.textContent = device.label || `摄像头 ${index + 1}`;
sel.appendChild(opt);
});
sel.disabled = false;
if (activeDeviceId) sel.value = activeDeviceId;
}
async function startCamera(deviceId) {
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
addWsLog("system", "该浏览器不支持摄像头访问");
return;
}
stopCameraStream();
const video = deviceId
? { deviceId: { exact: deviceId } }
: { facingMode: state.cameraFacing };
try {
state.cameraStream = await navigator.mediaDevices.getUserMedia({
video,
audio: false,
});
} catch (err) {
addWsLog("system", `摄像头错误:${err.message || err}`);
return;
}
els.cameraVideo.srcObject = state.cameraStream;
try {
await els.cameraVideo.play();
} catch (_) {
/* autoplay may resolve later */
}
state.cameraActive = true;
state.pendingImage = null;
setPreviewMode("camera");
els.cameraStartBtn.classList.add("is-active");
clearSampleSelection();
// Device labels become available only after permission is granted; refresh
// the dropdown now and select whichever camera is actually streaming.
await refreshVideoDevices();
const activeId =
state.cameraStream.getVideoTracks?.()[0]?.getSettings?.().deviceId ||
deviceId;
populateDeviceSelect(activeId);
// Reveal the camera device dropdown only while the camera is in use.
els.cameraDeviceRow.hidden = false;
setCameraButtonEnabled();
}
function stopCameraStream() {
if (state.cameraStream) {
state.cameraStream.getTracks().forEach((track) => track.stop());
state.cameraStream = null;
}
els.cameraVideo.srcObject = null;
state.cameraActive = false;
els.cameraStartBtn.classList.remove("is-active");
els.cameraDeviceRow.hidden = true;
}
function captureFromCamera() {
const payload = mediaToPayload(els.cameraVideo);
if (!payload) return null;
stopCameraStream();
setPendingImage(payload);
return payload;
}
// Load a same-origin/object URL into an <img> and resolve once decoded.
function loadImage(src) {
return new Promise((resolve, reject) => {
const img = new Image();
img.onload = () => resolve(img);
img.onerror = () => reject(new Error(`failed to load image: ${src}`));
img.src = src;
});
}
async function selectFileImage(file) {
if (!file) return;
const objectUrl = URL.createObjectURL(file);
try {
const img = await loadImage(objectUrl);
const payload = mediaToPayload(img);
if (!payload) return;
stopCameraStream();
clearSampleSelection();
setPendingImage(payload);
} catch (err) {
addWsLog("system", `上传错误:${err.message || err}`);
} finally {
URL.revokeObjectURL(objectUrl);
}
}
async function selectSampleImage(src, buttonEl) {
try {
const img = await loadImage(src);
const payload = mediaToPayload(img);
if (!payload) return;
stopCameraStream();
clearSampleSelection();
if (buttonEl) buttonEl.classList.add("is-selected");
setPendingImage(payload);
} catch (err) {
addWsLog("system", `示例图加载错误:${err.message || err}`);
}
}
function clearSampleSelection() {
els.cameraSamples
.querySelectorAll(".camera-drawer__sample.is-selected")
.forEach((el) => el.classList.remove("is-selected"));
}
function renderSampleThumbnails() {
if (state.samplesRendered) return;
state.samplesRendered = true;
els.cameraSamples.innerHTML = "";
for (const sample of SAMPLE_IMAGES) {
const btn = document.createElement("button");
btn.type = "button";
btn.className = "camera-drawer__sample";
btn.title = sample.label;
const img = document.createElement("img");
img.src = sample.src;
img.alt = sample.label;
btn.appendChild(img);
btn.addEventListener("click", () => selectSampleImage(sample.src, btn));
els.cameraSamples.appendChild(btn);
}
}
function resetCameraInput() {
stopCameraStream();
state.pendingImage = null;
clearSampleSelection();
els.cameraPhoto.removeAttribute("src");
setPreviewMode("idle");
setCameraButtonEnabled();
}
// Pre-select the first sample image so "拍摄完成" is immediately pressable when
// the drawer opens, without requiring the user to capture or pick first.
function selectDefaultImage() {
if (state.pendingImage || state.cameraActive) return;
const first = els.cameraSamples.querySelector(".camera-drawer__sample");
if (first && SAMPLE_IMAGES[0]) {
selectSampleImage(SAMPLE_IMAGES[0].src, first);
}
}
function sendImage(payload, text) {
if (!payload) return false;
if (!state.ws || state.ws.readyState !== WebSocket.OPEN) return false;
const message = {
type: "input.image",
image: payload.dataUrl,
mime_type: payload.mime,
width: payload.width,
height: payload.height,
text: text || CAMERA_DONE_TEXT,
interrupt: true,
};
wsSend(JSON.stringify(message));
// Mirror the text-input path: interrupt in-flight bot audio and render the
// user's image + text together as one local bubble (the engine does not echo
// image input back as a transcript event).
stopPlaybackQueue();
state.currentAssistantBubble = null;
addImageBubble("user", payload.dataUrl, text || CAMERA_DONE_TEXT);
return true;
}
function submitCameraImage() {
// If the live camera is on, grab the current frame first; otherwise use the
// already-selected (uploaded / sample / captured) image.
let payload = state.pendingImage;
if (state.cameraActive) {
payload = captureFromCamera() || payload;
}
if (!payload) return;
// Keep the existing workflow contract: the accompanying text stays the
// "【拍摄完成】" marker that advances the FastGPT camera step; the image is
// the new multimodal attachment.
if (!sendImage(payload, CAMERA_DONE_TEXT)) return;
resetCameraInput();
}
/* --------------------------------------------------------- Chat updates */
function handleUserTranscript(text) {
@@ -864,7 +1191,6 @@ function handleAssistantFinal(text, interrupted) {
if (interrupted) {
state.currentAssistantBubble.classList.add("bubble--interrupted");
}
updateCameraQuestion(text);
state.currentAssistantBubble = null;
scrollChatToBottom();
}
@@ -930,16 +1256,16 @@ async function connect() {
const chatId = inputChatId || generateChatId();
const url = wsUrlWithChatId(chatId);
if (!url) {
setStatus("error", "Missing URL");
setStatus("error", "缺少服务器地址");
return;
}
state.connecting = true;
state.chatId = chatId;
els.chatId.value = chatId;
setStatus("connecting", "Connecting…");
setStatus("connecting", "连接中…");
setConnectButton();
addWsLog("system", `connecting ${url}`);
addWsLog("system", `正在连接 ${url}`);
try {
// Pre-warm audio context on user gesture so playback works on Safari.
@@ -949,9 +1275,9 @@ async function connect() {
state.connecting = false;
state.chatId = "";
if (!inputChatId) els.chatId.value = "";
setStatus("error", "Audio init failed");
setStatus("error", "音频初始化失败");
setConnectButton();
addWsLog("error", `audio init failed: ${err.message || err}`, "error");
addWsLog("error", `音频初始化失败:${err.message || err}`, "error");
return;
}
@@ -963,9 +1289,9 @@ async function connect() {
state.connecting = false;
state.chatId = "";
if (!inputChatId) els.chatId.value = "";
setStatus("error", "Bad URL");
setStatus("error", "服务器地址无效");
setConnectButton();
addWsLog("error", `bad websocket URL: ${err.message || err}`, "error");
addWsLog("error", `WebSocket 地址无效:${err.message || err}`, "error");
return;
}
ws.binaryType = "arraybuffer";
@@ -986,15 +1312,15 @@ async function connect() {
state.connecting = false;
state.connected = true;
resetPlaybackClock();
addWsLog("system", "websocket open");
setStatus("connected", "Connected");
addWsLog("system", "连接已建立");
setStatus("connected", "已连接");
setConnectButton();
setMicButton();
setMicSelectEnabled();
refreshMicDevices();
wsSend(JSON.stringify(startMessage));
addBubble("system", "Session started.");
addBubble("system", "会话已开始。");
setComposerEnabled(true);
setCameraButtonEnabled();
els.textInput.focus();
@@ -1026,7 +1352,7 @@ async function connect() {
ws.addEventListener("error", (err) => {
console.error("WebSocket error", err);
setStatus("error", "Connection error");
setStatus("error", "连接错误");
addWsLog("error", "websocket error", "error");
});
@@ -1055,11 +1381,11 @@ async function connect() {
if (wasConnected) {
addBubble(
"system",
`Session ended${event.reason ? `${event.reason}` : ""}.`,
`会话已结束${event.reason ? `${event.reason}` : ""}`,
);
setStatus("idle", "Disconnected");
setStatus("idle", "未连接");
} else {
setStatus("error", "Connection closed");
setStatus("error", "连接已断开");
}
});
}
@@ -1101,7 +1427,7 @@ els.micBtn.addEventListener("click", async () => {
}
} catch (err) {
console.error("Mic error", err);
addBubble("system", `Mic error: ${err.message || err}`);
addBubble("system", `麦克风错误:${err.message || err}`);
} finally {
els.micBtn.disabled = !state.connected;
}
@@ -1118,7 +1444,7 @@ els.micSelect.addEventListener("change", async () => {
await startMic();
} catch (err) {
console.error("Mic switch error", err);
addBubble("system", `Mic switch error: ${err.message || err}`);
addBubble("system", `麦克风切换错误:${err.message || err}`);
} finally {
setMicButton();
setMicSelectEnabled();
@@ -1139,7 +1465,25 @@ els.clearWsLogBtn.addEventListener("click", () => {
els.cameraDoneBtn.addEventListener("click", () => {
if (!state.cameraState) return;
sendText(CAMERA_DONE_TEXT);
submitCameraImage();
});
els.cameraStartBtn.addEventListener("click", () => {
startCamera(els.cameraDeviceSelect.value || undefined);
});
els.cameraDeviceSelect.addEventListener("change", () => {
// Switching device only restarts the stream when the camera is already live;
// otherwise the choice is applied when "使用摄像头" is pressed.
if (state.cameraActive) {
startCamera(els.cameraDeviceSelect.value || undefined);
}
});
els.cameraUpload.addEventListener("change", (event) => {
const file = event.target.files && event.target.files[0];
selectFileImage(file);
event.target.value = "";
});
function autosizeTextarea() {
@@ -1174,6 +1518,7 @@ els.textInput.addEventListener("keydown", (event) => {
});
window.addEventListener("beforeunload", () => {
stopCameraStream();
if (state.ws) {
try {
state.ws.close();
@@ -1192,7 +1537,7 @@ window.addEventListener("beforeunload", () => {
els.url.value = defaultWsUrl();
setStatus("idle", "Disconnected");
setStatus("idle", "未连接");
setConnectButton();
setMicButton();
setMicSelectEnabled();

View File

@@ -1,5 +1,5 @@
<!doctype html>
<html lang="en">
<html lang="zh-CN">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
@@ -16,7 +16,7 @@
<div class="connection">
<label class="connection__field">
<span>WebSocket URL</span>
<span>服务器地址</span>
<input
id="ws-url"
type="text"
@@ -26,12 +26,12 @@
/>
</label>
<label class="connection__field connection__field--chat">
<span>Chat ID</span>
<span>会话 ID</span>
<div class="chat-id-control">
<input
id="chat-id"
type="text"
placeholder="optional chatId"
placeholder="可选"
spellcheck="false"
autocomplete="off"
/>
@@ -40,8 +40,8 @@
class="chat-id-control__copy"
type="button"
disabled
title="Copy Chat ID"
aria-label="Copy Chat ID"
title="复制会话 ID"
aria-label="复制会话 ID"
>
<svg class="copy-icon copy-icon--default" viewBox="0 0 16 16" width="14" height="14" fill="none" aria-hidden="true">
<rect x="5" y="5" width="8" height="9" rx="1.5" stroke="currentColor" stroke-width="1.4"/>
@@ -54,13 +54,13 @@
</div>
</label>
<button id="connect-btn" class="btn btn--primary" type="button">
Connect
连接
</button>
</div>
<div class="status">
<span id="status-dot" class="status__dot status__dot--idle"></span>
<span id="status-text" class="status__text">Disconnected</span>
<span id="status-text" class="status__text">未连接</span>
</div>
</header>
@@ -70,28 +70,87 @@
<aside
id="camera-drawer"
class="camera-drawer"
aria-label="Camera capture step"
aria-label="拍照步骤"
aria-hidden="true"
>
<div class="camera-drawer__panel">
<div class="camera-drawer__header">
<div>
<p class="camera-drawer__eyebrow">Camera</p>
<p class="camera-drawer__eyebrow">拍照</p>
<h2>拍照步骤</h2>
</div>
<span id="camera-state" class="camera-drawer__state">State -</span>
<span id="camera-state" class="camera-drawer__state">状态 -</span>
</div>
<div class="camera-drawer__preview" aria-hidden="true">
<div id="camera-preview" class="camera-drawer__preview">
<video
id="camera-video"
class="camera-drawer__video"
playsinline
muted
autoplay
></video>
<img
id="camera-photo"
class="camera-drawer__photo"
alt="已选择图片预览"
/>
<span class="camera-drawer__corner camera-drawer__corner--tl"></span>
<span class="camera-drawer__corner camera-drawer__corner--tr"></span>
<span class="camera-drawer__corner camera-drawer__corner--bl"></span>
<span class="camera-drawer__corner camera-drawer__corner--br"></span>
<span class="camera-drawer__lens"></span>
<span class="camera-drawer__scan"></span>
<span id="camera-placeholder" class="camera-drawer__placeholder">
打开摄像头实时拍摄,或从下方选择 / 上传图片
</span>
</div>
<p id="camera-question" class="camera-drawer__question"></p>
<div
id="camera-samples"
class="camera-drawer__samples"
aria-label="示例图片,点击选择"
></div>
<div class="camera-drawer__sources">
<label
class="btn btn--ghost camera-drawer__source"
>
上传图片
<input
id="camera-upload"
type="file"
accept="image/*"
hidden
/>
</label>
<button
id="camera-start-btn"
class="btn btn--ghost camera-drawer__source"
type="button"
title="打开摄像头"
>
使用摄像头
</button>
</div>
<label
id="camera-device-row"
class="device-picker camera-drawer__device-row"
hidden
>
<span class="device-picker__label">选择摄像头</span>
<select
id="camera-device-select"
class="device-picker__select"
disabled
>
<option value="">默认摄像头</option>
</select>
</label>
<button
id="camera-done-btn"
class="btn btn--primary camera-drawer__button"
@@ -100,23 +159,24 @@
>
拍摄完成
</button>
<canvas id="camera-canvas" hidden></canvas>
</div>
</aside>
<section class="chat" aria-label="Conversation history">
<section class="chat" aria-label="对话记录">
<div id="chat-log" class="chat__log" role="log" aria-live="polite">
<div class="chat__empty">
<p>Connect to the engine, enable your mic, and start talking.</p>
<p>连接服务、开启麦克风后即可开始对话。</p>
<p class="chat__hint">
Audio is streamed as PCM16 mono @ 16&nbsp;kHz over
<code>/ws-product</code>.
音频通过 <code>/ws-product</code> PCM16 单声道 16&nbsp;kHz
传输。
</p>
</div>
</div>
</section>
</div>
<footer class="controls" aria-label="Chat controls">
<footer class="controls" aria-label="操作栏">
<div class="meter" aria-hidden="true">
<div id="meter-fill" class="meter__fill"></div>
</div>
@@ -126,7 +186,7 @@
id="text-input"
class="composer__input"
rows="1"
placeholder="Type a message, or use the mic…"
placeholder="输入消息,或使用麦克风…"
disabled
></textarea>
<button
@@ -134,17 +194,17 @@
class="btn btn--primary composer__send"
type="submit"
disabled
title="Send message (Enter)"
title="发送消息 (Enter)"
>
Send
发送
</button>
</form>
<div class="controls__row">
<label class="device-picker">
<span class="device-picker__label">Microphone</span>
<span class="device-picker__label">麦克风</span>
<select id="mic-select" class="device-picker__select" disabled>
<option value="">Default microphone</option>
<option value="">默认麦克风</option>
</select>
</label>
@@ -154,7 +214,7 @@
type="button"
disabled
aria-pressed="false"
title="Mic is off"
title="麦克风已关闭"
>
<svg
class="mic-btn__icon"
@@ -172,52 +232,52 @@
fill="currentColor"
/>
</svg>
<span class="mic-btn__label">Enable mic</span>
<span class="mic-btn__label">开启麦克风</span>
</button>
<div class="indicators">
<span id="mic-indicator" class="indicator">
<span class="indicator__dot indicator__dot--mic"></span>
<span class="indicator__label">Mic</span>
<span class="indicator__label">麦克风</span>
</span>
<span id="bot-indicator" class="indicator">
<span class="indicator__dot indicator__dot--bot"></span>
<span class="indicator__label">Bot</span>
<span class="indicator__label">助手</span>
</span>
<span id="state-indicator" class="indicator indicator--state">
<span class="indicator__dot indicator__dot--state"></span>
<span id="state-label" class="indicator__label">State -</span>
<span id="state-label" class="indicator__label">状态 -</span>
</span>
</div>
<button id="clear-btn" class="btn btn--ghost" type="button">
Clear
清空
</button>
</div>
<p class="hint">
Press <kbd>Enter</kbd> to send, <kbd>Shift</kbd>+<kbd>Enter</kbd>
for newline. Sending text will interrupt the bot if it's speaking.
Browser echo cancellation is on; use headphones if echo persists.
<kbd>Enter</kbd> 发送,<kbd>Shift</kbd>+<kbd>Enter</kbd>
换行。发送文字会打断正在说话的助手。
浏览器回声消除已开启,如有回音请使用耳机。
</p>
</footer>
</div>
<section class="ws-log" aria-label="WebSocket log">
<section class="ws-log" aria-label="WebSocket 日志">
<div class="ws-log__header">
<div class="ws-log__header-left">
<h2>WebSocket Log</h2>
<h2>WebSocket 日志</h2>
<div class="ws-log__legend" aria-hidden="true">
<span class="ws-log__legend-item ws-log__legend-item--send">Send</span>
<span class="ws-log__legend-item ws-log__legend-item--recv">Recv</span>
<span class="ws-log__legend-item ws-log__legend-item--send">发送</span>
<span class="ws-log__legend-item ws-log__legend-item--recv">接收</span>
</div>
</div>
<button id="clear-ws-log-btn" class="btn btn--ghost" type="button">
Clear log
清空日志
</button>
</div>
<div id="ws-log" class="ws-log__body" role="log" aria-live="polite">
<div class="ws-log__empty">No websocket events yet.</div>
<div class="ws-log__empty">暂无 WebSocket 事件。</div>
</div>
</section>
</div>

BIN
static/voice-demo/samples/.DS_Store vendored Normal file

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 273 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 323 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 229 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

View File

@@ -136,7 +136,8 @@ body {
.camera-drawer__preview {
position: relative;
min-height: 210px;
aspect-ratio: 4 / 3;
min-height: 200px;
overflow: hidden;
border: 1px solid rgba(149, 160, 187, 0.28);
border-radius: 14px;
@@ -148,6 +149,49 @@ body {
background-size: 34px 34px, 34px 34px, auto, auto;
}
.camera-drawer__video,
.camera-drawer__photo {
position: absolute;
inset: 0;
width: 100%;
height: 100%;
object-fit: cover;
display: none;
z-index: 1;
}
.camera-drawer__photo {
object-fit: contain;
}
.camera-drawer__preview.is-camera .camera-drawer__video {
display: block;
}
.camera-drawer__preview.is-photo .camera-drawer__photo {
display: block;
}
/* Hide the decorative lens/scan/placeholder once real media is showing. */
.camera-drawer__preview.is-camera .camera-drawer__lens,
.camera-drawer__preview.is-photo .camera-drawer__lens,
.camera-drawer__preview.is-camera .camera-drawer__scan,
.camera-drawer__preview.is-photo .camera-drawer__scan,
.camera-drawer__preview.is-camera .camera-drawer__placeholder,
.camera-drawer__preview.is-photo .camera-drawer__placeholder {
display: none;
}
.camera-drawer__placeholder {
position: absolute;
inset: auto 18px 16px;
z-index: 2;
color: rgba(214, 220, 235, 0.78);
font-size: 12px;
line-height: 1.5;
text-align: center;
}
.camera-drawer__lens {
position: absolute;
top: 50%;
@@ -174,6 +218,7 @@ body {
.camera-drawer__corner {
position: absolute;
z-index: 2;
width: 28px;
height: 28px;
border-color: rgba(255, 255, 255, 0.7);
@@ -229,6 +274,87 @@ body {
cursor: not-allowed;
}
/* 上传图片 + 使用摄像头 share one row. */
.camera-drawer__sources {
display: flex;
gap: 8px;
}
/* The camera device dropdown only appears after "使用摄像头" is selected. */
.camera-drawer__device-row {
max-width: none;
}
.camera-drawer__device-row[hidden] {
display: none;
}
/* Active state for the "使用摄像头" button once the camera is live. */
.camera-drawer__source.is-active {
border-color: var(--success);
color: var(--success);
}
.camera-drawer__source {
flex: 1 1 0;
display: inline-flex;
align-items: center;
justify-content: center;
text-align: center;
min-height: 38px;
font-size: 13px;
font-weight: 600;
cursor: pointer;
}
.camera-drawer__source.is-active {
border-color: var(--success);
color: var(--success);
}
.camera-drawer__source:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.camera-drawer__samples {
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: 8px;
}
.camera-drawer__samples:empty {
display: none;
}
.camera-drawer__sample {
position: relative;
aspect-ratio: 4 / 3;
padding: 0;
border: 2px solid transparent;
border-radius: 10px;
overflow: hidden;
cursor: pointer;
background: #0f141f;
}
.camera-drawer__sample img {
width: 100%;
height: 100%;
object-fit: contain;
display: block;
}
.camera-drawer__sample:hover {
border-color: rgba(149, 160, 187, 0.6);
}
.camera-drawer__sample.is-selected {
border-color: var(--success);
box-shadow: 0 0 0 1px var(--success);
}
.app__body {
display: grid;
grid-template-columns: minmax(0, 1fr) clamp(300px, 32vw, 420px);
@@ -511,6 +637,18 @@ body {
margin-bottom: 4px;
}
.bubble__image {
display: block;
max-width: 240px;
width: 100%;
border-radius: 10px;
margin-bottom: 6px;
}
.bubble__image + .bubble__text:empty {
display: none;
}
/* WebSocket log --------------------------------------------------------- */
.ws-log {
@@ -567,8 +705,8 @@ body {
margin: 0;
font-size: 12px;
color: var(--text-dim);
text-transform: uppercase;
letter-spacing: 0.8px;
letter-spacing: 0.5px;
white-space: nowrap;
}
.ws-log__header-left {
@@ -823,11 +961,7 @@ body {
outline: none;
width: 100%;
cursor: pointer;
}
.device-picker__select:focus {
border-color: var(--accent);
box-shadow: 0 0 0 3px rgba(79, 140, 255, 0.18);
text-overflow: ellipsis;
}
.device-picker__select:disabled {
@@ -835,6 +969,11 @@ body {
cursor: not-allowed;
}
.device-picker__select:focus {
border-color: var(--accent);
box-shadow: 0 0 0 3px rgba(79, 140, 255, 0.18);
}
.mic-btn {
display: inline-flex;
align-items: center;