Remove invite button, correct stream asr tts transcription

This commit is contained in:
Xin Wang
2026-02-06 11:20:52 +08:00
parent cb35d87eb4
commit 960690ba80

View File

@@ -229,6 +229,11 @@
border-left: 3px solid var(--good); border-left: 3px solid var(--good);
} }
.chat-entry.interim {
opacity: 0.7;
font-style: italic;
}
.log-entry { .log-entry {
padding: 6px 8px; padding: 6px 8px;
border-bottom: 1px dashed rgba(255, 255, 255, 0.06); border-bottom: 1px dashed rgba(255, 255, 255, 0.06);
@@ -306,7 +311,6 @@
<div class="btn-row"> <div class="btn-row">
<button class="accent" id="connectBtn">Connect</button> <button class="accent" id="connectBtn">Connect</button>
<button class="secondary" id="disconnectBtn">Disconnect</button> <button class="secondary" id="disconnectBtn">Disconnect</button>
<button class="secondary" id="inviteBtn">Invite</button>
</div> </div>
<div class="status"> <div class="status">
<div id="statusDot" class="dot"></div> <div id="statusDot" class="dot"></div>
@@ -366,7 +370,6 @@
const wsUrl = document.getElementById("wsUrl"); const wsUrl = document.getElementById("wsUrl");
const connectBtn = document.getElementById("connectBtn"); const connectBtn = document.getElementById("connectBtn");
const disconnectBtn = document.getElementById("disconnectBtn"); const disconnectBtn = document.getElementById("disconnectBtn");
const inviteBtn = document.getElementById("inviteBtn");
const inputSelect = document.getElementById("inputSelect"); const inputSelect = document.getElementById("inputSelect");
const outputSelect = document.getElementById("outputSelect"); const outputSelect = document.getElementById("outputSelect");
const startMicBtn = document.getElementById("startMicBtn"); const startMicBtn = document.getElementById("startMicBtn");
@@ -390,6 +393,9 @@
let playbackDest = null; let playbackDest = null;
let playbackTime = 0; let playbackTime = 0;
let discardAudio = false; let discardAudio = false;
let playbackSources = [];
let interimUserEl = null;
let interimAiEl = null;
const targetSampleRate = 16000; const targetSampleRate = 16000;
@@ -424,6 +430,26 @@
chatHistory.scrollTop = chatHistory.scrollHeight; chatHistory.scrollTop = chatHistory.scrollHeight;
} }
function setInterim(role, text) {
const isAi = role === "AI";
let el = isAi ? interimAiEl : interimUserEl;
if (!text) {
if (el) el.remove();
if (isAi) interimAiEl = null;
else interimUserEl = null;
return;
}
if (!el) {
el = document.createElement("div");
el.className = `chat-entry ${isAi ? "ai" : "user"} interim`;
chatHistory.appendChild(el);
if (isAi) interimAiEl = el;
else interimUserEl = el;
}
el.textContent = `${role} (interim): ${text}`;
chatHistory.scrollTop = chatHistory.scrollHeight;
}
function setStatus(connected, detail) { function setStatus(connected, detail) {
statusDot.classList.toggle("on", connected); statusDot.classList.toggle("on", connected);
statusText.textContent = connected ? "Connected" : "Disconnected"; statusText.textContent = connected ? "Connected" : "Disconnected";
@@ -491,6 +517,10 @@
const startTime = Math.max(audioCtx.currentTime + 0.02, playbackTime); const startTime = Math.max(audioCtx.currentTime + 0.02, playbackTime);
source.start(startTime); source.start(startTime);
playbackTime = startTime + buffer.duration; playbackTime = startTime + buffer.duration;
playbackSources.push(source);
source.onended = () => {
playbackSources = playbackSources.filter((s) => s !== source);
};
} }
async function connect() { async function connect() {
@@ -546,18 +576,46 @@
function handleEvent(event) { function handleEvent(event) {
const type = event.event || "unknown"; const type = event.event || "unknown";
logLine("event", type, event); logLine("event", type, event);
if (type === "transcript" && event.isFinal && event.text) { if (type === "transcript") {
addChat("You", event.text); if (event.isFinal && event.text) {
setInterim("You", "");
addChat("You", event.text);
} else if (event.text) {
setInterim("You", event.text);
}
} }
if (type === "llmResponse" && event.isFinal && event.text) { if (type === "llmResponse") {
addChat("AI", event.text); if (event.isFinal && event.text) {
setInterim("AI", "");
addChat("AI", event.text);
} else if (event.text) {
setInterim("AI", event.text);
}
} }
if (type === "trackStart") { if (type === "trackStart") {
discardAudio = false; discardAudio = false;
playbackTime = audioCtx ? audioCtx.currentTime : 0; playbackTime = audioCtx ? audioCtx.currentTime : 0;
} }
if (type === "speaking") {
// User started speaking: clear any in-flight audio to avoid overlap
discardAudio = true;
playbackTime = audioCtx ? audioCtx.currentTime : 0;
playbackSources.forEach((s) => {
try {
s.stop();
} catch (err) {}
});
playbackSources = [];
}
if (type === "interrupt") { if (type === "interrupt") {
discardAudio = true; discardAudio = true;
playbackTime = audioCtx ? audioCtx.currentTime : 0;
playbackSources.forEach((s) => {
try {
s.stop();
} catch (err) {}
});
playbackSources = [];
} }
} }
@@ -647,9 +705,6 @@
await requestDeviceAccess(); await requestDeviceAccess();
await refreshDevices(); await refreshDevices();
}); });
inviteBtn.addEventListener("click", () => {
sendCommand({ command: "invite", option: { codec: "pcm", sampleRate: targetSampleRate } });
});
startMicBtn.addEventListener("click", startMic); startMicBtn.addEventListener("click", startMic);
stopMicBtn.addEventListener("click", stopMic); stopMicBtn.addEventListener("click", stopMic);
sendChatBtn.addEventListener("click", () => { sendChatBtn.addEventListener("click", () => {