Remove invite button, correct stream asr tts transcription

This commit is contained in:
Xin Wang
2026-02-06 11:20:52 +08:00
parent cb35d87eb4
commit 960690ba80

View File

@@ -229,6 +229,11 @@
border-left: 3px solid var(--good);
}
.chat-entry.interim {
opacity: 0.7;
font-style: italic;
}
.log-entry {
padding: 6px 8px;
border-bottom: 1px dashed rgba(255, 255, 255, 0.06);
@@ -306,7 +311,6 @@
<div class="btn-row">
<button class="accent" id="connectBtn">Connect</button>
<button class="secondary" id="disconnectBtn">Disconnect</button>
<button class="secondary" id="inviteBtn">Invite</button>
</div>
<div class="status">
<div id="statusDot" class="dot"></div>
@@ -366,7 +370,6 @@
const wsUrl = document.getElementById("wsUrl");
const connectBtn = document.getElementById("connectBtn");
const disconnectBtn = document.getElementById("disconnectBtn");
const inviteBtn = document.getElementById("inviteBtn");
const inputSelect = document.getElementById("inputSelect");
const outputSelect = document.getElementById("outputSelect");
const startMicBtn = document.getElementById("startMicBtn");
@@ -390,6 +393,9 @@
let playbackDest = null;
let playbackTime = 0;
let discardAudio = false;
let playbackSources = [];
let interimUserEl = null;
let interimAiEl = null;
const targetSampleRate = 16000;
@@ -424,6 +430,26 @@
chatHistory.scrollTop = chatHistory.scrollHeight;
}
function setInterim(role, text) {
const isAi = role === "AI";
let el = isAi ? interimAiEl : interimUserEl;
if (!text) {
if (el) el.remove();
if (isAi) interimAiEl = null;
else interimUserEl = null;
return;
}
if (!el) {
el = document.createElement("div");
el.className = `chat-entry ${isAi ? "ai" : "user"} interim`;
chatHistory.appendChild(el);
if (isAi) interimAiEl = el;
else interimUserEl = el;
}
el.textContent = `${role} (interim): ${text}`;
chatHistory.scrollTop = chatHistory.scrollHeight;
}
function setStatus(connected, detail) {
statusDot.classList.toggle("on", connected);
statusText.textContent = connected ? "Connected" : "Disconnected";
@@ -491,6 +517,10 @@
const startTime = Math.max(audioCtx.currentTime + 0.02, playbackTime);
source.start(startTime);
playbackTime = startTime + buffer.duration;
playbackSources.push(source);
source.onended = () => {
playbackSources = playbackSources.filter((s) => s !== source);
};
}
async function connect() {
@@ -546,18 +576,46 @@
function handleEvent(event) {
const type = event.event || "unknown";
logLine("event", type, event);
if (type === "transcript" && event.isFinal && event.text) {
addChat("You", event.text);
if (type === "transcript") {
if (event.isFinal && event.text) {
setInterim("You", "");
addChat("You", event.text);
} else if (event.text) {
setInterim("You", event.text);
}
}
if (type === "llmResponse" && event.isFinal && event.text) {
addChat("AI", event.text);
if (type === "llmResponse") {
if (event.isFinal && event.text) {
setInterim("AI", "");
addChat("AI", event.text);
} else if (event.text) {
setInterim("AI", event.text);
}
}
if (type === "trackStart") {
discardAudio = false;
playbackTime = audioCtx ? audioCtx.currentTime : 0;
}
if (type === "speaking") {
// User started speaking: clear any in-flight audio to avoid overlap
discardAudio = true;
playbackTime = audioCtx ? audioCtx.currentTime : 0;
playbackSources.forEach((s) => {
try {
s.stop();
} catch (err) {}
});
playbackSources = [];
}
if (type === "interrupt") {
discardAudio = true;
playbackTime = audioCtx ? audioCtx.currentTime : 0;
playbackSources.forEach((s) => {
try {
s.stop();
} catch (err) {}
});
playbackSources = [];
}
}
@@ -647,9 +705,6 @@
await requestDeviceAccess();
await refreshDevices();
});
inviteBtn.addEventListener("click", () => {
sendCommand({ command: "invite", option: { codec: "pcm", sampleRate: targetSampleRate } });
});
startMicBtn.addEventListener("click", startMic);
stopMicBtn.addEventListener("click", stopMic);
sendChatBtn.addEventListener("click", () => {