Integrate eou and vad

2026-01-29 13:57:12 +08:00
parent 4cb267a288
commit cd90b4fb37
25 changed files with 2592 additions and 297 deletions
--- a/examples/mic_client.py
+++ b/examples/mic_client.py
@@ -0,0 +1,137 @@
+"""
+Microphone WebSocket Client
+
+Connects to the backend WebSocket endpoint and streams audio from the microphone.
+Used to test VAD and EOU detection.
+
+Dependencies:
+    pip install pyaudio aiohttp
+"""
+
+import asyncio
+import aiohttp
+import pyaudio
+import json
+import sys
+from datetime import datetime
+
+# Configuration
+SERVER_URL = "ws://localhost:8000/ws"
+SAMPLE_RATE = 16000
+CHANNELS = 1
+CHUNK_DURATION_MS = 20
+CHUNK_SIZE = int(SAMPLE_RATE * (CHUNK_DURATION_MS / 1000.0))  # 320 samples for 20ms
+FORMAT = pyaudio.paInt16
+
+async def send_audio_loop(ws, stream):
+    """Read from microphone and send to WebSocket."""
+    print("🎙️  Microphone streaming started...")
+    try:
+        while True:
+            # Read non-blocking? PyAudio read is blocking, so run in executor or use specialized async lib.
+            # For simplicity in this script, we'll just read. It might block the event loop slightly 
+            # but for 20ms chunks it's usually acceptable for a test script.
+            # To be proper async, we should run_in_executor.
+            data = await asyncio.get_event_loop().run_in_executor(
+                None, lambda: stream.read(CHUNK_SIZE, exception_on_overflow=False)
+            )
+            
+            await ws.send_bytes(data)
+            # No sleep needed here as microphone dictates the timing
+            
+    except Exception as e:
+        print(f"❌ Error in send loop: {e}")
+
+async def receive_loop(ws):
+    """Listen for VAD/EOU events."""
+    print("👂 Listening for server events...")
+    async for msg in ws:
+        timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
+        
+        if msg.type == aiohttp.WSMsgType.TEXT:
+            try:
+                data = json.loads(msg.data)
+                event = data.get('event')
+                
+                # Highlight VAD/EOU events
+                if event == 'speaking':
+                    print(f"[{timestamp}] 🗣️  SPEAKING STARTED")
+                elif event == 'silence':
+                    print(f"[{timestamp}] 🤫 SILENCE DETECTED")
+                elif event == 'eou':
+                    print(f"[{timestamp}] ✅ END OF UTTERANCE (EOU)")
+                elif event == 'error':
+                    print(f"[{timestamp}] ❌ ERROR: {data.get('error')}")
+                else:
+                    print(f"[{timestamp}] 📩 {event}: {str(data)[:100]}")
+                    
+            except json.JSONDecodeError:
+                print(f"[{timestamp}] 📄 Text: {msg.data}")
+                
+        elif msg.type == aiohttp.WSMsgType.CLOSED:
+            print("❌ Connection closed")
+            break
+        elif msg.type == aiohttp.WSMsgType.ERROR:
+            print("❌ Connection error")
+            break
+
+async def main():
+    p = pyaudio.PyAudio()
+    
+    # Check for input devices
+    info = p.get_host_api_info_by_index(0)
+    numdevices = info.get('deviceCount')
+    if numdevices == 0:
+        print("❌ No audio input devices found")
+        return
+
+    # Open microphone stream
+    try:
+        stream = p.open(format=FORMAT,
+                        channels=CHANNELS,
+                        rate=SAMPLE_RATE,
+                        input=True,
+                        frames_per_buffer=CHUNK_SIZE)
+    except Exception as e:
+        print(f"❌ Failed to open microphone: {e}")
+        return
+
+    session = aiohttp.ClientSession()
+    
+    try:
+        print(f"🔌 Connecting to {SERVER_URL}...")
+        async with session.ws_connect(SERVER_URL) as ws:
+            print("✅ Connected!")
+
+            # 1. Send Invite
+            invite_msg = {
+                "command": "invite",
+                "option": {
+                    "codec": "pcm",
+                    "samplerate": SAMPLE_RATE
+                }
+            }
+            await ws.send_json(invite_msg)
+            print("📤 Sent Invite")
+
+            # 2. Run loops
+            await asyncio.gather(
+                receive_loop(ws),
+                send_audio_loop(ws, stream)
+            )
+
+    except aiohttp.ClientConnectorError:
+        print(f"❌ Failed to connect to {SERVER_URL}. Is the server running?")
+    except KeyboardInterrupt:
+        print("\n👋 Stopping...")
+    finally:
+        stream.stop_stream()
+        stream.close()
+        p.terminate()
+        await session.close()
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        pass