fix legacy protocol in wav client

This commit is contained in:
Xin Wang
2026-02-17 13:18:00 +08:00
parent 30eb4397c2
commit 01c0de0a4d

View File

@@ -105,6 +105,7 @@ class WavFileClient:
self.track_started = False
self.track_ended = False
self.send_completed = False
self.session_ready = False
# Events log
self.events_log = []
@@ -131,13 +132,18 @@ class WavFileClient:
self.ws = await websockets.connect(self.url)
self.running = True
self.log_event("", "Connected!")
# Send invite command
# WS v1 handshake: hello -> session.start
await self.send_command({
"command": "invite",
"option": {
"codec": "pcm",
"sampleRate": self.sample_rate
"type": "hello",
"version": "v1",
})
await self.send_command({
"type": "session.start",
"audio": {
"encoding": "pcm_s16le",
"sample_rate_hz": self.sample_rate,
"channels": 1
}
})
@@ -145,12 +151,12 @@ class WavFileClient:
"""Send JSON command to server."""
if self.ws:
await self.ws.send(json.dumps(cmd))
self.log_event("", f"Command: {cmd.get('command', 'unknown')}")
self.log_event("", f"Command: {cmd.get('type', 'unknown')}")
async def send_hangup(self, reason: str = "Session complete") -> None:
"""Send hangup command."""
await self.send_command({
"command": "hangup",
"type": "session.stop",
"reason": reason
})
@@ -277,51 +283,50 @@ class WavFileClient:
async def _handle_event(self, event: dict) -> None:
"""Handle incoming event."""
event_type = event.get("event", "unknown")
if event_type == "answer":
event_type = event.get("type", "unknown")
if event_type == "hello.ack":
self.log_event("", "Handshake acknowledged")
elif event_type == "session.started":
self.session_ready = True
self.log_event("", "Session ready!")
elif event_type == "speaking":
elif event_type == "input.speech_started":
self.log_event("", "Speech detected")
elif event_type == "silence":
elif event_type == "input.speech_stopped":
self.log_event("", "Silence detected")
elif event_type == "transcript":
# ASR transcript (interim = asrDelta-style, final = asrFinal-style)
elif event_type == "transcript.delta":
text = event.get("text", "")
is_final = event.get("isFinal", False)
if is_final:
# Clear interim line and print final
print(" " * 80, end="\r")
self.log_event("", f"→ You: {text}")
else:
# Interim result - show with indicator (overwrite same line, as in mic_client)
display_text = text[:60] + "..." if len(text) > 60 else text
print(f" [listening] {display_text}".ljust(80), end="\r")
elif event_type == "ttfb":
display_text = text[:60] + "..." if len(text) > 60 else text
print(f" [listening] {display_text}".ljust(80), end="\r")
elif event_type == "transcript.final":
text = event.get("text", "")
print(" " * 80, end="\r")
self.log_event("", f"→ You: {text}")
elif event_type == "metrics.ttfb":
latency_ms = event.get("latencyMs", 0)
self.log_event("", f"[TTFB] Server latency: {latency_ms}ms")
elif event_type == "llmResponse":
elif event_type == "assistant.response.delta":
text = event.get("text", "")
is_final = event.get("isFinal", False)
if is_final:
self.log_event("", f"LLM Response (final): {text[:100]}{'...' if len(text) > 100 else ''}")
elif self.verbose:
# Show streaming chunks only in verbose mode
if self.verbose and text:
self.log_event("", f"LLM: {text}")
elif event_type == "trackStart":
elif event_type == "assistant.response.final":
text = event.get("text", "")
if text:
self.log_event("", f"LLM Response (final): {text[:100]}{'...' if len(text) > 100 else ''}")
elif event_type == "output.audio.start":
self.track_started = True
self.response_start_time = time.time()
self.waiting_for_first_audio = True
self.log_event("", "Bot started speaking")
elif event_type == "trackEnd":
elif event_type == "output.audio.end":
self.track_ended = True
self.log_event("", "Bot finished speaking")
elif event_type == "interrupt":
elif event_type == "response.interrupted":
self.log_event("", "Bot interrupted!")
elif event_type == "error":
self.log_event("!", f"Error: {event.get('error')}")
elif event_type == "hangup":
self.log_event("", f"Hangup: {event.get('reason')}")
self.log_event("!", f"Error: {event.get('message')}")
elif event_type == "session.stopped":
self.log_event("", f"Session stopped: {event.get('reason')}")
self.running = False
else:
self.log_event("", f"Event: {event_type}")
@@ -359,11 +364,15 @@ class WavFileClient:
# Connect to server
await self.connect()
# Wait for answer
await asyncio.sleep(0.5)
# Start receiver task
receiver_task = asyncio.create_task(self.receiver())
# Wait for session.started before streaming audio
ready_start = time.time()
while self.running and not self.session_ready:
if time.time() - ready_start > 8.0:
raise TimeoutError("Timeout waiting for session.started")
await asyncio.sleep(0.05)
# Send audio
await self.audio_sender(audio_data)