fix long run bug

2026-02-03 12:05:09 +08:00
parent a2e341b433
commit 8bc24ded59
6 changed files with 343 additions and 11 deletions
--- a/examples/mic_client.py
+++ b/examples/mic_client.py
@@ -17,6 +17,7 @@ import argparse
 import asyncio
 import json
 import sys
+import time
 import threading
 import queue
 from pathlib import Path
@@ -92,6 +93,14 @@ class MicrophoneClient:
        # State
        self.is_recording = True
        self.is_playing = True
+        
+        # TTFB tracking (Time to First Byte)
+        self.request_start_time = None
+        self.first_audio_received = False
+        
+        # Interrupt handling - discard audio until next trackStart
+        self._discard_audio = False
+        self._audio_sequence = 0  # Track audio sequence to detect stale chunks
    
    async def connect(self) -> None:
        """Connect to WebSocket server."""
@@ -117,6 +126,10 @@ class MicrophoneClient:
    
    async def send_chat(self, text: str) -> None:
        """Send chat message (text input)."""
+        # Reset TTFB tracking for new request
+        self.request_start_time = time.time()
+        self.first_audio_received = False
+        
        await self.send_command({
            "command": "chat",
            "text": text
@@ -236,9 +249,21 @@ class MicrophoneClient:
                        # Audio data received
                        self.bytes_received += len(message)
                        
+                        # Check if we should discard this audio (after interrupt)
+                        if self._discard_audio:
+                            duration_ms = len(message) / (self.sample_rate * 2) * 1000
+                            print(f"← Audio: {duration_ms:.0f}ms (DISCARDED - waiting for new track)")
+                            continue
+                        
                        if self.is_playing:
                            self._add_audio_to_buffer(message)
                        
+                        # Calculate and display TTFB for first audio packet
+                        if not self.first_audio_received and self.request_start_time:
+                            client_ttfb_ms = (time.time() - self.request_start_time) * 1000
+                            self.first_audio_received = True
+                            print(f"← [TTFB] Client first audio latency: {client_ttfb_ms:.0f}ms")
+                        
                        # Show progress (less verbose)
                        with self.audio_output_lock:
                            buffer_ms = len(self.audio_output_buffer) / (self.sample_rate * 2) * 1000
@@ -285,20 +310,36 @@ class MicrophoneClient:
                # Interim result - show with indicator (overwrite same line)
                display_text = text[:60] + "..." if len(text) > 60 else text
                print(f"  [listening] {display_text}".ljust(80), end="\r")
+        elif event_type == "ttfb":
+            # Server-side TTFB event
+            latency_ms = event.get("latencyMs", 0)
+            print(f"← [TTFB] Server reported latency: {latency_ms}ms")
        elif event_type == "trackStart":
            print("← Bot started speaking")
+            # IMPORTANT: Accept audio again after trackStart
+            self._discard_audio = False
+            self._audio_sequence += 1
+            # Reset TTFB tracking for voice responses (when no chat was sent)
+            if self.request_start_time is None:
+                self.request_start_time = time.time()
+                self.first_audio_received = False
            # Clear any old audio in buffer
            with self.audio_output_lock:
                self.audio_output_buffer = b""
        elif event_type == "trackEnd":
            print("← Bot finished speaking")
+            # Reset TTFB tracking after response completes
+            self.request_start_time = None
+            self.first_audio_received = False
        elif event_type == "interrupt":
            print("← Bot interrupted!")
-            # IMPORTANT: Clear audio buffer immediately on interrupt
+            # IMPORTANT: Discard all audio until next trackStart
+            self._discard_audio = True
+            # Clear audio buffer immediately
            with self.audio_output_lock:
                buffer_ms = len(self.audio_output_buffer) / (self.sample_rate * 2) * 1000
                self.audio_output_buffer = b""
-                print(f"   (cleared {buffer_ms:.0f}ms of buffered audio)")
+                print(f"   (cleared {buffer_ms:.0f}ms, discarding audio until new track)")
        elif event_type == "error":
            print(f"← Error: {event.get('error')}")
        elif event_type == "hangup":
--- a/examples/simple_client.py
+++ b/examples/simple_client.py
@@ -12,6 +12,7 @@ import argparse
 import asyncio
 import json
 import sys
+import time
 import wave
 import io

@@ -67,6 +68,13 @@ class SimpleVoiceClient:
        
        # Stats
        self.bytes_received = 0
+        
+        # TTFB tracking (Time to First Byte)
+        self.request_start_time = None
+        self.first_audio_received = False
+        
+        # Interrupt handling - discard audio until next trackStart
+        self._discard_audio = False
    
    async def connect(self):
        """Connect to server."""
@@ -84,6 +92,10 @@ class SimpleVoiceClient:
    
    async def send_chat(self, text: str):
        """Send chat message."""
+        # Reset TTFB tracking for new request
+        self.request_start_time = time.time()
+        self.first_audio_received = False
+        
        await self.ws.send(json.dumps({"command": "chat", "text": text}))
        print(f"-> chat: {text}")
    
@@ -120,6 +132,18 @@ class SimpleVoiceClient:
                    # Audio data
                    self.bytes_received += len(msg)
                    duration_ms = len(msg) / (self.sample_rate * 2) * 1000
+                    
+                    # Check if we should discard this audio (after interrupt)
+                    if self._discard_audio:
+                        print(f"<- audio: {len(msg)} bytes ({duration_ms:.0f}ms) [DISCARDED]")
+                        continue
+                    
+                    # Calculate and display TTFB for first audio packet
+                    if not self.first_audio_received and self.request_start_time:
+                        client_ttfb_ms = (time.time() - self.request_start_time) * 1000
+                        self.first_audio_received = True
+                        print(f"<- [TTFB] Client first audio latency: {client_ttfb_ms:.0f}ms")
+                    
                    print(f"<- audio: {len(msg)} bytes ({duration_ms:.0f}ms)")
                    
                    # Play immediately in executor to not block
@@ -138,6 +162,18 @@ class SimpleVoiceClient:
                            print(f"<- You said: {text}")
                        else:
                            print(f"<- [listening] {text}", end="\r")
+                    elif etype == "ttfb":
+                        # Server-side TTFB event
+                        latency_ms = event.get("latencyMs", 0)
+                        print(f"<- [TTFB] Server reported latency: {latency_ms}ms")
+                    elif etype == "trackStart":
+                        # New track starting - accept audio again
+                        self._discard_audio = False
+                        print(f"<- {etype}")
+                    elif etype == "interrupt":
+                        # Interrupt - discard audio until next trackStart
+                        self._discard_audio = True
+                        print(f"<- {etype} (discarding audio until new track)")
                    elif etype == "hangup":
                        print(f"<- {etype}")
                        self.running = False
--- a/examples/test_websocket.py
+++ b/examples/test_websocket.py
@@ -0,0 +1,166 @@
+"""WebSocket endpoint test client.
+
+Tests the /ws endpoint with sine wave or file audio streaming.
+Based on reference/py-active-call/exec/test_ws_endpoint/test_ws.py
+"""
+
+import asyncio
+import aiohttp
+import json
+import struct
+import math
+import argparse
+import os
+from datetime import datetime
+
+# Configuration
+SERVER_URL = "ws://localhost:8000/ws"
+SAMPLE_RATE = 16000
+FREQUENCY = 440  # 440Hz Sine Wave
+CHUNK_DURATION_MS = 20
+# 16kHz * 16-bit (2 bytes) * 20ms = 640 bytes per chunk
+CHUNK_SIZE_BYTES = int(SAMPLE_RATE * 2 * (CHUNK_DURATION_MS / 1000.0))
+
+
+def generate_sine_wave(duration_ms=1000):
+    """Generates sine wave audio (16kHz mono PCM 16-bit)."""
+    num_samples = int(SAMPLE_RATE * (duration_ms / 1000.0))
+    audio_data = bytearray()
+
+    for x in range(num_samples):
+        # Generate sine wave sample
+        value = int(32767.0 * math.sin(2 * math.pi * FREQUENCY * x / SAMPLE_RATE))
+        # Pack as little-endian 16-bit integer
+        audio_data.extend(struct.pack('<h', value))
+
+    return audio_data
+
+
+async def receive_loop(ws):
+    """Listen for incoming messages from the server."""
+    print("👂 Listening for server responses...")
+    async for msg in ws:
+        timestamp = datetime.now().strftime("%H:%M:%S")
+
+        if msg.type == aiohttp.WSMsgType.TEXT:
+            try:
+                data = json.loads(msg.data)
+                event_type = data.get('event', 'Unknown')
+                print(f"[{timestamp}] 📨 Event: {event_type} | {msg.data[:150]}...")
+            except json.JSONDecodeError:
+                print(f"[{timestamp}] 📨 Text: {msg.data[:100]}...")
+
+        elif msg.type == aiohttp.WSMsgType.BINARY:
+            # Received audio chunk back (e.g., TTS or echo)
+            print(f"[{timestamp}] 🔊 Audio: {len(msg.data)} bytes", end="\r")
+
+        elif msg.type == aiohttp.WSMsgType.CLOSED:
+            print(f"\n[{timestamp}] ❌ Socket Closed")
+            break
+
+        elif msg.type == aiohttp.WSMsgType.ERROR:
+            print(f"\n[{timestamp}] ⚠️ Socket Error")
+            break
+
+
+async def send_file_loop(ws, file_path):
+    """Stream a raw PCM/WAV file to the server."""
+    if not os.path.exists(file_path):
+        print(f"❌ Error: File '{file_path}' not found.")
+        return
+
+    print(f"📂 Streaming file: {file_path} ...")
+
+    with open(file_path, "rb") as f:
+        # Skip WAV header if present (first 44 bytes)
+        if file_path.endswith('.wav'):
+            f.read(44)
+
+        while True:
+            chunk = f.read(CHUNK_SIZE_BYTES)
+            if not chunk:
+                break
+
+            # Send binary frame
+            await ws.send_bytes(chunk)
+
+            # Sleep to simulate real-time playback
+            await asyncio.sleep(CHUNK_DURATION_MS / 1000.0)
+
+    print(f"\n✅ Finished streaming {file_path}")
+
+
+async def send_sine_loop(ws):
+    """Stream generated sine wave to the server."""
+    print("🎙️  Starting Audio Stream (Sine Wave)...")
+
+    # Generate 10 seconds of audio buffer
+    audio_buffer = generate_sine_wave(5000)
+    cursor = 0
+
+    while cursor < len(audio_buffer):
+        chunk = audio_buffer[cursor:cursor + CHUNK_SIZE_BYTES]
+        if not chunk:
+            break
+
+        await ws.send_bytes(chunk)
+        cursor += len(chunk)
+
+        await asyncio.sleep(CHUNK_DURATION_MS / 1000.0)
+
+    print("\n✅ Finished streaming test audio.")
+
+
+async def run_client(url, file_path=None, use_sine=False):
+    """Run the WebSocket test client."""
+    session = aiohttp.ClientSession()
+    try:
+        print(f"🔌 Connecting to {url}...")
+        async with session.ws_connect(url) as ws:
+            print("✅ Connected!")
+
+            # Send initial invite command
+            init_cmd = {
+                "command": "invite",
+                "option": {
+                    "codec": "pcm",
+                    "samplerate": SAMPLE_RATE
+                }
+            }
+            await ws.send_json(init_cmd)
+            print("📤 Sent Invite Command")
+
+            # Select sender based on args
+            if use_sine:
+                sender_task = send_sine_loop(ws)
+            elif file_path:
+                sender_task = send_file_loop(ws, file_path)
+            else:
+                # Default to sine wave
+                sender_task = send_sine_loop(ws)
+
+            # Run send and receive loops in parallel
+            await asyncio.gather(
+                receive_loop(ws),
+                sender_task
+            )
+
+    except aiohttp.ClientConnectorError:
+        print(f"❌ Connection Failed. Is the server running at {url}?")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+    finally:
+        await session.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="WebSocket Audio Test Client")
+    parser.add_argument("--url", default=SERVER_URL, help="WebSocket endpoint URL")
+    parser.add_argument("--file", help="Path to PCM/WAV file to stream")
+    parser.add_argument("--sine", action="store_true", help="Use sine wave generation (default)")
+    args = parser.parse_args()
+
+    try:
+        asyncio.run(run_client(args.url, args.file, args.sine))
+    except KeyboardInterrupt:
+        print("\n👋 Client stopped.")