Add camera functionality to voice demo with UI updates and state management
- Introduced a camera drawer for capturing images during the conversation flow. - Added prompts for various camera states to guide users through the photo capture process. - Updated HTML structure to include camera-related elements and integrated them with existing chat functionality. - Enhanced JavaScript logic to manage camera state and button enabling/disabling based on connection status. - Updated CSS for styling the camera drawer and its components, ensuring responsive design across devices. - Adjusted README to reflect the new demo URL for voice functionality.
This commit is contained in:
@@ -51,7 +51,7 @@ examples/webpage/
|
||||
2. Open the demo page served by the same process:
|
||||
|
||||
```text
|
||||
http://127.0.0.1:8000/demo/
|
||||
http://127.0.0.1:8000/voice-demo/
|
||||
```
|
||||
|
||||
The default websocket URL is derived from the page host
|
||||
@@ -63,7 +63,7 @@ examples/webpage/
|
||||
```json
|
||||
"server": {
|
||||
"serve_webpage": true,
|
||||
"webpage_mount": "/demo"
|
||||
"webpage_mount": "/voice-demo"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -23,6 +23,19 @@ const WS_LOG_GROUP_KEYS = {
|
||||
TEXT_DELTA: "recv:response.text.delta",
|
||||
AUDIO_SEND: "send:input.audio",
|
||||
};
|
||||
const CAMERA_DONE_TEXT = "【拍摄完成】";
|
||||
const CAMERA_STATE_PROMPTS = {
|
||||
2000: "请对准车辆碰撞部位拍摄照片。",
|
||||
2001: "请对准车辆碰撞部位拍摄照片。",
|
||||
2002: "请对准被撞物品拍摄照片。",
|
||||
2003: "请切换摄像头对准本人拍摄一张正面照片。",
|
||||
2010: "请对准第一辆车碰撞部位拍摄。",
|
||||
2011: "请对准第一辆车碰撞部位拍摄。",
|
||||
2012: "请对准第二辆车碰撞部位拍摄。",
|
||||
2013: "请对准第二方车辆侧后方,看清车牌拍摄。",
|
||||
2014: "请拍摄另一方驾驶人的正面照片。",
|
||||
2015: "请切换前置摄像头对准本人拍摄一张正面照片。",
|
||||
};
|
||||
|
||||
function defaultWsUrl() {
|
||||
const scheme = location.protocol === "https:" ? "wss:" : "ws:";
|
||||
@@ -34,6 +47,7 @@ const els = {
|
||||
connectBtn: document.getElementById("connect-btn"),
|
||||
statusDot: document.getElementById("status-dot"),
|
||||
statusText: document.getElementById("status-text"),
|
||||
conversation: document.getElementById("conversation"),
|
||||
chatLog: document.getElementById("chat-log"),
|
||||
micBtn: document.getElementById("mic-btn"),
|
||||
micSelect: document.getElementById("mic-select"),
|
||||
@@ -42,6 +56,10 @@ const els = {
|
||||
botIndicator: document.getElementById("bot-indicator"),
|
||||
stateIndicator: document.getElementById("state-indicator"),
|
||||
stateLabel: document.getElementById("state-label"),
|
||||
cameraDrawer: document.getElementById("camera-drawer"),
|
||||
cameraState: document.getElementById("camera-state"),
|
||||
cameraQuestion: document.getElementById("camera-question"),
|
||||
cameraDoneBtn: document.getElementById("camera-done-btn"),
|
||||
clearBtn: document.getElementById("clear-btn"),
|
||||
clearWsLogBtn: document.getElementById("clear-ws-log-btn"),
|
||||
wsLog: document.getElementById("ws-log"),
|
||||
@@ -75,6 +93,7 @@ const state = {
|
||||
// Chat state.
|
||||
currentAssistantBubble: null,
|
||||
assistantState: "",
|
||||
cameraState: "",
|
||||
|
||||
// VU meter smoothing.
|
||||
meterLevel: 0,
|
||||
@@ -121,6 +140,7 @@ function setMicSelectEnabled() {
|
||||
function setComposerEnabled(enabled) {
|
||||
els.textInput.disabled = !enabled;
|
||||
els.sendBtn.disabled = !enabled || els.textInput.value.trim().length === 0;
|
||||
setCameraButtonEnabled();
|
||||
}
|
||||
|
||||
function setBotIndicator(active) {
|
||||
@@ -134,6 +154,37 @@ function setAssistantState(value) {
|
||||
els.stateIndicator.classList.toggle("is-active", Boolean(text));
|
||||
els.stateLabel.textContent = label ? `State ${label}` : "State -";
|
||||
els.stateIndicator.title = label ? `Assistant state: ${text}` : "Assistant state";
|
||||
syncCameraDrawer(text);
|
||||
}
|
||||
|
||||
function setCameraButtonEnabled() {
|
||||
if (!els.cameraDoneBtn) return;
|
||||
els.cameraDoneBtn.disabled =
|
||||
!state.connected || !state.cameraState ||
|
||||
!state.ws || state.ws.readyState !== WebSocket.OPEN;
|
||||
}
|
||||
|
||||
function syncCameraDrawer(value) {
|
||||
const prompt = CAMERA_STATE_PROMPTS[value];
|
||||
const open = Boolean(prompt);
|
||||
state.cameraState = open ? value : "";
|
||||
els.cameraDrawer.classList.toggle("is-open", open);
|
||||
els.conversation.classList.toggle("has-camera", open);
|
||||
els.cameraDrawer.setAttribute("aria-hidden", open ? "false" : "true");
|
||||
if (open) {
|
||||
els.cameraState.textContent = `State ${value}`;
|
||||
els.cameraQuestion.textContent = prompt;
|
||||
} else {
|
||||
els.cameraState.textContent = "State -";
|
||||
els.cameraQuestion.textContent = "";
|
||||
}
|
||||
setCameraButtonEnabled();
|
||||
}
|
||||
|
||||
function updateCameraQuestion(text) {
|
||||
const value = typeof text === "string" ? text.trim() : "";
|
||||
if (!state.cameraState || !value) return;
|
||||
els.cameraQuestion.textContent = value;
|
||||
}
|
||||
|
||||
function addBubble(role, text) {
|
||||
@@ -761,6 +812,7 @@ function handleAssistantFinal(text, interrupted) {
|
||||
if (interrupted) {
|
||||
state.currentAssistantBubble.classList.add("bubble--interrupted");
|
||||
}
|
||||
updateCameraQuestion(text);
|
||||
state.currentAssistantBubble = null;
|
||||
scrollChatToBottom();
|
||||
}
|
||||
@@ -883,6 +935,7 @@ async function connect() {
|
||||
wsSend(JSON.stringify(startMessage));
|
||||
addBubble("system", "Session started.");
|
||||
setComposerEnabled(true);
|
||||
setCameraButtonEnabled();
|
||||
els.textInput.focus();
|
||||
});
|
||||
|
||||
@@ -928,6 +981,7 @@ async function connect() {
|
||||
setMicButton();
|
||||
setMicSelectEnabled();
|
||||
setComposerEnabled(false);
|
||||
setCameraButtonEnabled();
|
||||
setBotIndicator(false);
|
||||
finalizeWsLogGroup();
|
||||
addWsLog(
|
||||
@@ -1019,6 +1073,11 @@ els.clearWsLogBtn.addEventListener("click", () => {
|
||||
clearWsLog();
|
||||
});
|
||||
|
||||
els.cameraDoneBtn.addEventListener("click", () => {
|
||||
if (!state.cameraState) return;
|
||||
sendText(CAMERA_DONE_TEXT);
|
||||
});
|
||||
|
||||
function autosizeTextarea() {
|
||||
const ta = els.textInput;
|
||||
ta.style.height = "auto";
|
||||
|
||||
@@ -38,17 +38,55 @@
|
||||
|
||||
<div class="app__body">
|
||||
<div class="app__main">
|
||||
<section class="chat" aria-label="Conversation history">
|
||||
<div id="chat-log" class="chat__log" role="log" aria-live="polite">
|
||||
<div class="chat__empty">
|
||||
<p>Connect to the engine, enable your mic, and start talking.</p>
|
||||
<p class="chat__hint">
|
||||
Audio is streamed as PCM16 mono @ 16 kHz over
|
||||
<code>/ws-product</code>.
|
||||
</p>
|
||||
<div id="conversation" class="conversation">
|
||||
<aside
|
||||
id="camera-drawer"
|
||||
class="camera-drawer"
|
||||
aria-label="Camera capture step"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<div class="camera-drawer__panel">
|
||||
<div class="camera-drawer__header">
|
||||
<div>
|
||||
<p class="camera-drawer__eyebrow">Camera</p>
|
||||
<h2>拍照步骤</h2>
|
||||
</div>
|
||||
<span id="camera-state" class="camera-drawer__state">State -</span>
|
||||
</div>
|
||||
|
||||
<div class="camera-drawer__preview" aria-hidden="true">
|
||||
<span class="camera-drawer__corner camera-drawer__corner--tl"></span>
|
||||
<span class="camera-drawer__corner camera-drawer__corner--tr"></span>
|
||||
<span class="camera-drawer__corner camera-drawer__corner--bl"></span>
|
||||
<span class="camera-drawer__corner camera-drawer__corner--br"></span>
|
||||
<span class="camera-drawer__lens"></span>
|
||||
<span class="camera-drawer__scan"></span>
|
||||
</div>
|
||||
|
||||
<p id="camera-question" class="camera-drawer__question"></p>
|
||||
<button
|
||||
id="camera-done-btn"
|
||||
class="btn btn--primary camera-drawer__button"
|
||||
type="button"
|
||||
disabled
|
||||
>
|
||||
拍摄完成
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</aside>
|
||||
|
||||
<section class="chat" aria-label="Conversation history">
|
||||
<div id="chat-log" class="chat__log" role="log" aria-live="polite">
|
||||
<div class="chat__empty">
|
||||
<p>Connect to the engine, enable your mic, and start talking.</p>
|
||||
<p class="chat__hint">
|
||||
Audio is streamed as PCM16 mono @ 16 kHz over
|
||||
<code>/ws-product</code>.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<footer class="controls" aria-label="Chat controls">
|
||||
<div class="meter" aria-hidden="true">
|
||||
|
||||
@@ -62,6 +62,173 @@ body {
|
||||
padding: 14px;
|
||||
}
|
||||
|
||||
.camera-drawer {
|
||||
flex: 0 0 var(--camera-drawer-width, min(34%, 430px));
|
||||
max-width: 0;
|
||||
min-width: 0;
|
||||
min-height: 0;
|
||||
overflow: hidden;
|
||||
pointer-events: none;
|
||||
opacity: 0;
|
||||
transform: translateX(-18px);
|
||||
transition:
|
||||
max-width 340ms cubic-bezier(0.22, 1, 0.36, 1),
|
||||
opacity 180ms ease,
|
||||
transform 340ms cubic-bezier(0.22, 1, 0.36, 1);
|
||||
will-change: max-width, opacity, transform;
|
||||
}
|
||||
|
||||
.camera-drawer.is-open {
|
||||
max-width: var(--camera-drawer-max-width, 430px);
|
||||
pointer-events: auto;
|
||||
transform: translateX(0);
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.camera-drawer__panel {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 16px;
|
||||
height: 100%;
|
||||
min-width: 280px;
|
||||
padding: 18px;
|
||||
overflow-y: auto;
|
||||
background:
|
||||
linear-gradient(180deg, rgba(26, 32, 48, 0.98), rgba(11, 13, 18, 0.98)),
|
||||
var(--bg-elevated);
|
||||
border: 1px solid rgba(106, 161, 255, 0.35);
|
||||
border-width: 0 1px 0 0;
|
||||
border-radius: 0;
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
.camera-drawer__header {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.camera-drawer__eyebrow {
|
||||
margin: 0 0 4px;
|
||||
color: var(--text-dim);
|
||||
font-size: 11px;
|
||||
letter-spacing: 0.8px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
.camera-drawer__header h2 {
|
||||
margin: 0;
|
||||
font-size: 16px;
|
||||
font-weight: 650;
|
||||
}
|
||||
|
||||
.camera-drawer__state {
|
||||
flex-shrink: 0;
|
||||
padding: 5px 8px;
|
||||
border: 1px solid rgba(255, 184, 77, 0.34);
|
||||
border-radius: 999px;
|
||||
background: rgba(255, 184, 77, 0.12);
|
||||
color: #ffd18a;
|
||||
font-size: 11px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.camera-drawer__preview {
|
||||
position: relative;
|
||||
min-height: 210px;
|
||||
overflow: hidden;
|
||||
border: 1px solid rgba(149, 160, 187, 0.28);
|
||||
border-radius: 14px;
|
||||
background:
|
||||
linear-gradient(rgba(255, 255, 255, 0.035) 1px, transparent 1px),
|
||||
linear-gradient(90deg, rgba(255, 255, 255, 0.035) 1px, transparent 1px),
|
||||
radial-gradient(circle at center, rgba(79, 140, 255, 0.22), transparent 42%),
|
||||
#0f141f;
|
||||
background-size: 34px 34px, 34px 34px, auto, auto;
|
||||
}
|
||||
|
||||
.camera-drawer__lens {
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
width: 92px;
|
||||
height: 92px;
|
||||
border: 2px solid rgba(230, 233, 242, 0.38);
|
||||
border-radius: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
box-shadow:
|
||||
inset 0 0 0 14px rgba(79, 140, 255, 0.1),
|
||||
0 0 36px rgba(79, 140, 255, 0.18);
|
||||
}
|
||||
|
||||
.camera-drawer__scan {
|
||||
position: absolute;
|
||||
left: 18px;
|
||||
right: 18px;
|
||||
top: 50%;
|
||||
height: 1px;
|
||||
background: linear-gradient(90deg, transparent, var(--success), transparent);
|
||||
box-shadow: 0 0 12px rgba(45, 210, 139, 0.8);
|
||||
}
|
||||
|
||||
.camera-drawer__corner {
|
||||
position: absolute;
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
border-color: rgba(255, 255, 255, 0.7);
|
||||
}
|
||||
|
||||
.camera-drawer__corner--tl {
|
||||
top: 16px;
|
||||
left: 16px;
|
||||
border-top: 2px solid;
|
||||
border-left: 2px solid;
|
||||
}
|
||||
|
||||
.camera-drawer__corner--tr {
|
||||
top: 16px;
|
||||
right: 16px;
|
||||
border-top: 2px solid;
|
||||
border-right: 2px solid;
|
||||
}
|
||||
|
||||
.camera-drawer__corner--bl {
|
||||
bottom: 16px;
|
||||
left: 16px;
|
||||
border-bottom: 2px solid;
|
||||
border-left: 2px solid;
|
||||
}
|
||||
|
||||
.camera-drawer__corner--br {
|
||||
right: 16px;
|
||||
bottom: 16px;
|
||||
border-right: 2px solid;
|
||||
border-bottom: 2px solid;
|
||||
}
|
||||
|
||||
.camera-drawer__question {
|
||||
margin: 0;
|
||||
color: var(--text);
|
||||
font-size: 17px;
|
||||
font-weight: 600;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.camera-drawer__button {
|
||||
width: 100%;
|
||||
margin-top: auto;
|
||||
min-height: 48px;
|
||||
border-radius: 12px;
|
||||
font-size: 15px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.camera-drawer__button:disabled {
|
||||
opacity: 0.55;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.app__body {
|
||||
display: grid;
|
||||
grid-template-columns: minmax(0, 1fr) clamp(300px, 32vw, 420px);
|
||||
@@ -80,6 +247,17 @@ body {
|
||||
border-radius: var(--radius);
|
||||
}
|
||||
|
||||
.conversation {
|
||||
display: flex;
|
||||
min-height: 0;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.conversation.has-camera {
|
||||
--camera-drawer-width: min(34%, 430px);
|
||||
--camera-drawer-max-width: 430px;
|
||||
}
|
||||
|
||||
/* Header ---------------------------------------------------------------- */
|
||||
|
||||
.app__header {
|
||||
@@ -187,9 +365,11 @@ body {
|
||||
/* Chat ------------------------------------------------------------------ */
|
||||
|
||||
.chat {
|
||||
flex: 1 1 0;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
min-width: 0;
|
||||
min-height: 0;
|
||||
}
|
||||
|
||||
@@ -205,6 +385,7 @@ body {
|
||||
|
||||
.chat__empty {
|
||||
margin: auto;
|
||||
width: min(100%, 460px);
|
||||
text-align: center;
|
||||
color: var(--text-dim);
|
||||
}
|
||||
@@ -773,6 +954,11 @@ body {
|
||||
/* Responsive ------------------------------------------------------------ */
|
||||
|
||||
@media (max-width: 820px) {
|
||||
.conversation.has-camera {
|
||||
--camera-drawer-width: 46%;
|
||||
--camera-drawer-max-width: 420px;
|
||||
}
|
||||
|
||||
.app__body {
|
||||
grid-template-columns: 1fr;
|
||||
grid-template-rows: minmax(0, 1fr) min(240px, 32vh);
|
||||
@@ -800,6 +986,24 @@ body {
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.conversation.has-camera {
|
||||
--camera-drawer-width: 58%;
|
||||
--camera-drawer-max-width: calc(100vw - 120px);
|
||||
}
|
||||
|
||||
.camera-drawer__panel {
|
||||
min-width: 220px;
|
||||
padding: 16px;
|
||||
}
|
||||
|
||||
.camera-drawer__preview {
|
||||
min-height: 150px;
|
||||
}
|
||||
|
||||
.camera-drawer__question {
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
.app__header {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user