fix long run bug
This commit is contained in:
@@ -17,6 +17,7 @@ import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import threading
|
||||
import queue
|
||||
from pathlib import Path
|
||||
@@ -92,6 +93,14 @@ class MicrophoneClient:
|
||||
# State
|
||||
self.is_recording = True
|
||||
self.is_playing = True
|
||||
|
||||
# TTFB tracking (Time to First Byte)
|
||||
self.request_start_time = None
|
||||
self.first_audio_received = False
|
||||
|
||||
# Interrupt handling - discard audio until next trackStart
|
||||
self._discard_audio = False
|
||||
self._audio_sequence = 0 # Track audio sequence to detect stale chunks
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Connect to WebSocket server."""
|
||||
@@ -117,6 +126,10 @@ class MicrophoneClient:
|
||||
|
||||
async def send_chat(self, text: str) -> None:
|
||||
"""Send chat message (text input)."""
|
||||
# Reset TTFB tracking for new request
|
||||
self.request_start_time = time.time()
|
||||
self.first_audio_received = False
|
||||
|
||||
await self.send_command({
|
||||
"command": "chat",
|
||||
"text": text
|
||||
@@ -236,9 +249,21 @@ class MicrophoneClient:
|
||||
# Audio data received
|
||||
self.bytes_received += len(message)
|
||||
|
||||
# Check if we should discard this audio (after interrupt)
|
||||
if self._discard_audio:
|
||||
duration_ms = len(message) / (self.sample_rate * 2) * 1000
|
||||
print(f"← Audio: {duration_ms:.0f}ms (DISCARDED - waiting for new track)")
|
||||
continue
|
||||
|
||||
if self.is_playing:
|
||||
self._add_audio_to_buffer(message)
|
||||
|
||||
# Calculate and display TTFB for first audio packet
|
||||
if not self.first_audio_received and self.request_start_time:
|
||||
client_ttfb_ms = (time.time() - self.request_start_time) * 1000
|
||||
self.first_audio_received = True
|
||||
print(f"← [TTFB] Client first audio latency: {client_ttfb_ms:.0f}ms")
|
||||
|
||||
# Show progress (less verbose)
|
||||
with self.audio_output_lock:
|
||||
buffer_ms = len(self.audio_output_buffer) / (self.sample_rate * 2) * 1000
|
||||
@@ -285,20 +310,36 @@ class MicrophoneClient:
|
||||
# Interim result - show with indicator (overwrite same line)
|
||||
display_text = text[:60] + "..." if len(text) > 60 else text
|
||||
print(f" [listening] {display_text}".ljust(80), end="\r")
|
||||
elif event_type == "ttfb":
|
||||
# Server-side TTFB event
|
||||
latency_ms = event.get("latencyMs", 0)
|
||||
print(f"← [TTFB] Server reported latency: {latency_ms}ms")
|
||||
elif event_type == "trackStart":
|
||||
print("← Bot started speaking")
|
||||
# IMPORTANT: Accept audio again after trackStart
|
||||
self._discard_audio = False
|
||||
self._audio_sequence += 1
|
||||
# Reset TTFB tracking for voice responses (when no chat was sent)
|
||||
if self.request_start_time is None:
|
||||
self.request_start_time = time.time()
|
||||
self.first_audio_received = False
|
||||
# Clear any old audio in buffer
|
||||
with self.audio_output_lock:
|
||||
self.audio_output_buffer = b""
|
||||
elif event_type == "trackEnd":
|
||||
print("← Bot finished speaking")
|
||||
# Reset TTFB tracking after response completes
|
||||
self.request_start_time = None
|
||||
self.first_audio_received = False
|
||||
elif event_type == "interrupt":
|
||||
print("← Bot interrupted!")
|
||||
# IMPORTANT: Clear audio buffer immediately on interrupt
|
||||
# IMPORTANT: Discard all audio until next trackStart
|
||||
self._discard_audio = True
|
||||
# Clear audio buffer immediately
|
||||
with self.audio_output_lock:
|
||||
buffer_ms = len(self.audio_output_buffer) / (self.sample_rate * 2) * 1000
|
||||
self.audio_output_buffer = b""
|
||||
print(f" (cleared {buffer_ms:.0f}ms of buffered audio)")
|
||||
print(f" (cleared {buffer_ms:.0f}ms, discarding audio until new track)")
|
||||
elif event_type == "error":
|
||||
print(f"← Error: {event.get('error')}")
|
||||
elif event_type == "hangup":
|
||||
|
||||
@@ -12,6 +12,7 @@ import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import wave
|
||||
import io
|
||||
|
||||
@@ -67,6 +68,13 @@ class SimpleVoiceClient:
|
||||
|
||||
# Stats
|
||||
self.bytes_received = 0
|
||||
|
||||
# TTFB tracking (Time to First Byte)
|
||||
self.request_start_time = None
|
||||
self.first_audio_received = False
|
||||
|
||||
# Interrupt handling - discard audio until next trackStart
|
||||
self._discard_audio = False
|
||||
|
||||
async def connect(self):
|
||||
"""Connect to server."""
|
||||
@@ -84,6 +92,10 @@ class SimpleVoiceClient:
|
||||
|
||||
async def send_chat(self, text: str):
|
||||
"""Send chat message."""
|
||||
# Reset TTFB tracking for new request
|
||||
self.request_start_time = time.time()
|
||||
self.first_audio_received = False
|
||||
|
||||
await self.ws.send(json.dumps({"command": "chat", "text": text}))
|
||||
print(f"-> chat: {text}")
|
||||
|
||||
@@ -120,6 +132,18 @@ class SimpleVoiceClient:
|
||||
# Audio data
|
||||
self.bytes_received += len(msg)
|
||||
duration_ms = len(msg) / (self.sample_rate * 2) * 1000
|
||||
|
||||
# Check if we should discard this audio (after interrupt)
|
||||
if self._discard_audio:
|
||||
print(f"<- audio: {len(msg)} bytes ({duration_ms:.0f}ms) [DISCARDED]")
|
||||
continue
|
||||
|
||||
# Calculate and display TTFB for first audio packet
|
||||
if not self.first_audio_received and self.request_start_time:
|
||||
client_ttfb_ms = (time.time() - self.request_start_time) * 1000
|
||||
self.first_audio_received = True
|
||||
print(f"<- [TTFB] Client first audio latency: {client_ttfb_ms:.0f}ms")
|
||||
|
||||
print(f"<- audio: {len(msg)} bytes ({duration_ms:.0f}ms)")
|
||||
|
||||
# Play immediately in executor to not block
|
||||
@@ -138,6 +162,18 @@ class SimpleVoiceClient:
|
||||
print(f"<- You said: {text}")
|
||||
else:
|
||||
print(f"<- [listening] {text}", end="\r")
|
||||
elif etype == "ttfb":
|
||||
# Server-side TTFB event
|
||||
latency_ms = event.get("latencyMs", 0)
|
||||
print(f"<- [TTFB] Server reported latency: {latency_ms}ms")
|
||||
elif etype == "trackStart":
|
||||
# New track starting - accept audio again
|
||||
self._discard_audio = False
|
||||
print(f"<- {etype}")
|
||||
elif etype == "interrupt":
|
||||
# Interrupt - discard audio until next trackStart
|
||||
self._discard_audio = True
|
||||
print(f"<- {etype} (discarding audio until new track)")
|
||||
elif etype == "hangup":
|
||||
print(f"<- {etype}")
|
||||
self.running = False
|
||||
|
||||
166
examples/test_websocket.py
Normal file
166
examples/test_websocket.py
Normal file
@@ -0,0 +1,166 @@
|
||||
"""WebSocket endpoint test client.
|
||||
|
||||
Tests the /ws endpoint with sine wave or file audio streaming.
|
||||
Based on reference/py-active-call/exec/test_ws_endpoint/test_ws.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import json
|
||||
import struct
|
||||
import math
|
||||
import argparse
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
# Configuration
|
||||
SERVER_URL = "ws://localhost:8000/ws"
|
||||
SAMPLE_RATE = 16000
|
||||
FREQUENCY = 440 # 440Hz Sine Wave
|
||||
CHUNK_DURATION_MS = 20
|
||||
# 16kHz * 16-bit (2 bytes) * 20ms = 640 bytes per chunk
|
||||
CHUNK_SIZE_BYTES = int(SAMPLE_RATE * 2 * (CHUNK_DURATION_MS / 1000.0))
|
||||
|
||||
|
||||
def generate_sine_wave(duration_ms=1000):
|
||||
"""Generates sine wave audio (16kHz mono PCM 16-bit)."""
|
||||
num_samples = int(SAMPLE_RATE * (duration_ms / 1000.0))
|
||||
audio_data = bytearray()
|
||||
|
||||
for x in range(num_samples):
|
||||
# Generate sine wave sample
|
||||
value = int(32767.0 * math.sin(2 * math.pi * FREQUENCY * x / SAMPLE_RATE))
|
||||
# Pack as little-endian 16-bit integer
|
||||
audio_data.extend(struct.pack('<h', value))
|
||||
|
||||
return audio_data
|
||||
|
||||
|
||||
async def receive_loop(ws):
|
||||
"""Listen for incoming messages from the server."""
|
||||
print("👂 Listening for server responses...")
|
||||
async for msg in ws:
|
||||
timestamp = datetime.now().strftime("%H:%M:%S")
|
||||
|
||||
if msg.type == aiohttp.WSMsgType.TEXT:
|
||||
try:
|
||||
data = json.loads(msg.data)
|
||||
event_type = data.get('event', 'Unknown')
|
||||
print(f"[{timestamp}] 📨 Event: {event_type} | {msg.data[:150]}...")
|
||||
except json.JSONDecodeError:
|
||||
print(f"[{timestamp}] 📨 Text: {msg.data[:100]}...")
|
||||
|
||||
elif msg.type == aiohttp.WSMsgType.BINARY:
|
||||
# Received audio chunk back (e.g., TTS or echo)
|
||||
print(f"[{timestamp}] 🔊 Audio: {len(msg.data)} bytes", end="\r")
|
||||
|
||||
elif msg.type == aiohttp.WSMsgType.CLOSED:
|
||||
print(f"\n[{timestamp}] ❌ Socket Closed")
|
||||
break
|
||||
|
||||
elif msg.type == aiohttp.WSMsgType.ERROR:
|
||||
print(f"\n[{timestamp}] ⚠️ Socket Error")
|
||||
break
|
||||
|
||||
|
||||
async def send_file_loop(ws, file_path):
|
||||
"""Stream a raw PCM/WAV file to the server."""
|
||||
if not os.path.exists(file_path):
|
||||
print(f"❌ Error: File '{file_path}' not found.")
|
||||
return
|
||||
|
||||
print(f"📂 Streaming file: {file_path} ...")
|
||||
|
||||
with open(file_path, "rb") as f:
|
||||
# Skip WAV header if present (first 44 bytes)
|
||||
if file_path.endswith('.wav'):
|
||||
f.read(44)
|
||||
|
||||
while True:
|
||||
chunk = f.read(CHUNK_SIZE_BYTES)
|
||||
if not chunk:
|
||||
break
|
||||
|
||||
# Send binary frame
|
||||
await ws.send_bytes(chunk)
|
||||
|
||||
# Sleep to simulate real-time playback
|
||||
await asyncio.sleep(CHUNK_DURATION_MS / 1000.0)
|
||||
|
||||
print(f"\n✅ Finished streaming {file_path}")
|
||||
|
||||
|
||||
async def send_sine_loop(ws):
|
||||
"""Stream generated sine wave to the server."""
|
||||
print("🎙️ Starting Audio Stream (Sine Wave)...")
|
||||
|
||||
# Generate 10 seconds of audio buffer
|
||||
audio_buffer = generate_sine_wave(5000)
|
||||
cursor = 0
|
||||
|
||||
while cursor < len(audio_buffer):
|
||||
chunk = audio_buffer[cursor:cursor + CHUNK_SIZE_BYTES]
|
||||
if not chunk:
|
||||
break
|
||||
|
||||
await ws.send_bytes(chunk)
|
||||
cursor += len(chunk)
|
||||
|
||||
await asyncio.sleep(CHUNK_DURATION_MS / 1000.0)
|
||||
|
||||
print("\n✅ Finished streaming test audio.")
|
||||
|
||||
|
||||
async def run_client(url, file_path=None, use_sine=False):
|
||||
"""Run the WebSocket test client."""
|
||||
session = aiohttp.ClientSession()
|
||||
try:
|
||||
print(f"🔌 Connecting to {url}...")
|
||||
async with session.ws_connect(url) as ws:
|
||||
print("✅ Connected!")
|
||||
|
||||
# Send initial invite command
|
||||
init_cmd = {
|
||||
"command": "invite",
|
||||
"option": {
|
||||
"codec": "pcm",
|
||||
"samplerate": SAMPLE_RATE
|
||||
}
|
||||
}
|
||||
await ws.send_json(init_cmd)
|
||||
print("📤 Sent Invite Command")
|
||||
|
||||
# Select sender based on args
|
||||
if use_sine:
|
||||
sender_task = send_sine_loop(ws)
|
||||
elif file_path:
|
||||
sender_task = send_file_loop(ws, file_path)
|
||||
else:
|
||||
# Default to sine wave
|
||||
sender_task = send_sine_loop(ws)
|
||||
|
||||
# Run send and receive loops in parallel
|
||||
await asyncio.gather(
|
||||
receive_loop(ws),
|
||||
sender_task
|
||||
)
|
||||
|
||||
except aiohttp.ClientConnectorError:
|
||||
print(f"❌ Connection Failed. Is the server running at {url}?")
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="WebSocket Audio Test Client")
|
||||
parser.add_argument("--url", default=SERVER_URL, help="WebSocket endpoint URL")
|
||||
parser.add_argument("--file", help="Path to PCM/WAV file to stream")
|
||||
parser.add_argument("--sine", action="store_true", help="Use sine wave generation (default)")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
asyncio.run(run_client(args.url, args.file, args.sine))
|
||||
except KeyboardInterrupt:
|
||||
print("\n👋 Client stopped.")
|
||||
Reference in New Issue
Block a user