Implement WS v1 protocol and runtime-config powered debug drawer

This commit is contained in:
Xin Wang
2026-02-09 08:19:39 +08:00
parent 0fc56e2685
commit fb6d1eb1da
13 changed files with 986 additions and 298 deletions

View File

@@ -36,7 +36,7 @@ def generate_sine_wave(duration_ms=1000):
return audio_data
async def receive_loop(ws):
async def receive_loop(ws, ready_event: asyncio.Event):
"""Listen for incoming messages from the server."""
print("👂 Listening for server responses...")
async for msg in ws:
@@ -45,8 +45,10 @@ async def receive_loop(ws):
if msg.type == aiohttp.WSMsgType.TEXT:
try:
data = json.loads(msg.data)
event_type = data.get('event', 'Unknown')
event_type = data.get('type', 'Unknown')
print(f"[{timestamp}] 📨 Event: {event_type} | {msg.data[:150]}...")
if event_type == "session.started":
ready_event.set()
except json.JSONDecodeError:
print(f"[{timestamp}] 📨 Text: {msg.data[:100]}...")
@@ -118,35 +120,43 @@ async def run_client(url, file_path=None, use_sine=False):
print(f"🔌 Connecting to {url}...")
async with session.ws_connect(url) as ws:
print("✅ Connected!")
session_ready = asyncio.Event()
recv_task = asyncio.create_task(receive_loop(ws, session_ready))
# Send initial invite command
init_cmd = {
"command": "invite",
"option": {
"codec": "pcm",
"samplerate": SAMPLE_RATE
# Send v1 hello + session.start handshake
await ws.send_json({"type": "hello", "version": "v1"})
await ws.send_json({
"type": "session.start",
"audio": {
"encoding": "pcm_s16le",
"sample_rate_hz": SAMPLE_RATE,
"channels": 1
}
}
await ws.send_json(init_cmd)
print("📤 Sent Invite Command")
})
print("📤 Sent v1 hello/session.start")
await asyncio.wait_for(session_ready.wait(), timeout=8)
# Select sender based on args
if use_sine:
sender_task = send_sine_loop(ws)
await send_sine_loop(ws)
elif file_path:
sender_task = send_file_loop(ws, file_path)
await send_file_loop(ws, file_path)
else:
# Default to sine wave
sender_task = send_sine_loop(ws)
await send_sine_loop(ws)
# Run send and receive loops in parallel
await asyncio.gather(
receive_loop(ws),
sender_task
)
await ws.send_json({"type": "session.stop", "reason": "test_complete"})
await asyncio.sleep(1)
recv_task.cancel()
try:
await recv_task
except asyncio.CancelledError:
pass
except aiohttp.ClientConnectorError:
print(f"❌ Connection Failed. Is the server running at {url}?")
except asyncio.TimeoutError:
print("❌ Timeout waiting for session.started")
except Exception as e:
print(f"❌ Error: {e}")
finally:

View File

@@ -547,7 +547,7 @@
setStatus(true, "Session open");
logLine("sys", "WebSocket connected");
ensureAudioContext();
sendCommand({ command: "invite", option: { codec: "pcm", sampleRate: targetSampleRate } });
sendCommand({ type: "hello", version: "v1" });
};
ws.onclose = () => {
@@ -574,7 +574,10 @@
}
function disconnect() {
if (ws) ws.close();
if (ws && ws.readyState === WebSocket.OPEN) {
sendCommand({ type: "session.stop", reason: "client_disconnect" });
ws.close();
}
ws = null;
setStatus(false, "Disconnected");
}
@@ -585,40 +588,48 @@
return;
}
ws.send(JSON.stringify(cmd));
logLine("sys", `${cmd.command}`, cmd);
logLine("sys", `${cmd.type}`, cmd);
}
function handleEvent(event) {
const type = event.event || "unknown";
const type = event.type || "unknown";
logLine("event", type, event);
if (type === "transcript") {
if (event.isFinal && event.text) {
if (type === "hello.ack") {
sendCommand({
type: "session.start",
audio: { encoding: "pcm_s16le", sample_rate_hz: targetSampleRate, channels: 1 },
});
}
if (type === "transcript.final") {
if (event.text) {
setInterim("You", "");
addChat("You", event.text);
} else if (event.text) {
interimUserText += event.text;
setInterim("You", interimUserText);
}
}
if (type === "llmResponse") {
if (event.isFinal && event.text) {
if (type === "transcript.delta" && event.text) {
setInterim("You", event.text);
}
if (type === "assistant.response.final") {
if (event.text) {
setInterim("AI", "");
addChat("AI", event.text);
} else if (event.text) {
interimAiText += event.text;
setInterim("AI", interimAiText);
}
}
if (type === "trackStart") {
if (type === "assistant.response.delta" && event.text) {
interimAiText += event.text;
setInterim("AI", interimAiText);
}
if (type === "output.audio.start") {
// New bot audio: stop any previous playback to avoid overlap
stopPlayback();
discardAudio = false;
interimAiText = "";
}
if (type === "speaking") {
if (type === "input.speech_started") {
// User started speaking: clear any in-flight audio to avoid overlap
stopPlayback();
}
if (type === "interrupt") {
if (type === "response.interrupted") {
stopPlayback();
}
}
@@ -716,7 +727,7 @@
if (!text) return;
ensureAudioContext();
addChat("You", text);
sendCommand({ command: "chat", text });
sendCommand({ type: "input.text", text });
chatInput.value = "";
});
clearLogBtn.addEventListener("click", () => {