Integrate eou and vad

This commit is contained in:
Xin Wang
2026-01-29 13:57:12 +08:00
parent 4cb267a288
commit cd90b4fb37
25 changed files with 2592 additions and 297 deletions

137
examples/mic_client.py Normal file
View File

@@ -0,0 +1,137 @@
"""
Microphone WebSocket Client
Connects to the backend WebSocket endpoint and streams audio from the microphone.
Used to test VAD and EOU detection.
Dependencies:
pip install pyaudio aiohttp
"""
import asyncio
import aiohttp
import pyaudio
import json
import sys
from datetime import datetime
# Configuration
SERVER_URL = "ws://localhost:8000/ws"
SAMPLE_RATE = 16000
CHANNELS = 1
CHUNK_DURATION_MS = 20
CHUNK_SIZE = int(SAMPLE_RATE * (CHUNK_DURATION_MS / 1000.0)) # 320 samples for 20ms
FORMAT = pyaudio.paInt16
async def send_audio_loop(ws, stream):
"""Read from microphone and send to WebSocket."""
print("🎙️ Microphone streaming started...")
try:
while True:
# Read non-blocking? PyAudio read is blocking, so run in executor or use specialized async lib.
# For simplicity in this script, we'll just read. It might block the event loop slightly
# but for 20ms chunks it's usually acceptable for a test script.
# To be proper async, we should run_in_executor.
data = await asyncio.get_event_loop().run_in_executor(
None, lambda: stream.read(CHUNK_SIZE, exception_on_overflow=False)
)
await ws.send_bytes(data)
# No sleep needed here as microphone dictates the timing
except Exception as e:
print(f"❌ Error in send loop: {e}")
async def receive_loop(ws):
"""Listen for VAD/EOU events."""
print("👂 Listening for server events...")
async for msg in ws:
timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
if msg.type == aiohttp.WSMsgType.TEXT:
try:
data = json.loads(msg.data)
event = data.get('event')
# Highlight VAD/EOU events
if event == 'speaking':
print(f"[{timestamp}] 🗣️ SPEAKING STARTED")
elif event == 'silence':
print(f"[{timestamp}] 🤫 SILENCE DETECTED")
elif event == 'eou':
print(f"[{timestamp}] ✅ END OF UTTERANCE (EOU)")
elif event == 'error':
print(f"[{timestamp}] ❌ ERROR: {data.get('error')}")
else:
print(f"[{timestamp}] 📩 {event}: {str(data)[:100]}")
except json.JSONDecodeError:
print(f"[{timestamp}] 📄 Text: {msg.data}")
elif msg.type == aiohttp.WSMsgType.CLOSED:
print("❌ Connection closed")
break
elif msg.type == aiohttp.WSMsgType.ERROR:
print("❌ Connection error")
break
async def main():
p = pyaudio.PyAudio()
# Check for input devices
info = p.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
if numdevices == 0:
print("❌ No audio input devices found")
return
# Open microphone stream
try:
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=SAMPLE_RATE,
input=True,
frames_per_buffer=CHUNK_SIZE)
except Exception as e:
print(f"❌ Failed to open microphone: {e}")
return
session = aiohttp.ClientSession()
try:
print(f"🔌 Connecting to {SERVER_URL}...")
async with session.ws_connect(SERVER_URL) as ws:
print("✅ Connected!")
# 1. Send Invite
invite_msg = {
"command": "invite",
"option": {
"codec": "pcm",
"samplerate": SAMPLE_RATE
}
}
await ws.send_json(invite_msg)
print("📤 Sent Invite")
# 2. Run loops
await asyncio.gather(
receive_loop(ws),
send_audio_loop(ws, stream)
)
except aiohttp.ClientConnectorError:
print(f"❌ Failed to connect to {SERVER_URL}. Is the server running?")
except KeyboardInterrupt:
print("\n👋 Stopping...")
finally:
stream.stop_stream()
stream.close()
p.terminate()
await session.close()
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
pass