250 lines
7.7 KiB
Python
250 lines
7.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Simple WebSocket client for testing voice conversation.
|
|
Uses PyAudio for more reliable audio playback on Windows.
|
|
|
|
Usage:
|
|
python examples/simple_client.py
|
|
python examples/simple_client.py --text "Hello"
|
|
"""
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
import wave
|
|
import io
|
|
|
|
try:
|
|
import numpy as np
|
|
except ImportError:
|
|
print("pip install numpy")
|
|
sys.exit(1)
|
|
|
|
try:
|
|
import websockets
|
|
except ImportError:
|
|
print("pip install websockets")
|
|
sys.exit(1)
|
|
|
|
# Try PyAudio first (more reliable on Windows)
|
|
try:
|
|
import pyaudio
|
|
PYAUDIO_AVAILABLE = True
|
|
except ImportError:
|
|
PYAUDIO_AVAILABLE = False
|
|
print("PyAudio not available, trying sounddevice...")
|
|
|
|
try:
|
|
import sounddevice as sd
|
|
SD_AVAILABLE = True
|
|
except ImportError:
|
|
SD_AVAILABLE = False
|
|
|
|
if not PYAUDIO_AVAILABLE and not SD_AVAILABLE:
|
|
print("Please install pyaudio or sounddevice:")
|
|
print(" pip install pyaudio")
|
|
print(" or: pip install sounddevice")
|
|
sys.exit(1)
|
|
|
|
|
|
class SimpleVoiceClient:
|
|
"""Simple voice client with reliable audio playback."""
|
|
|
|
def __init__(self, url: str, sample_rate: int = 16000):
|
|
self.url = url
|
|
self.sample_rate = sample_rate
|
|
self.ws = None
|
|
self.running = False
|
|
|
|
# Audio buffer
|
|
self.audio_buffer = b""
|
|
|
|
# PyAudio setup
|
|
if PYAUDIO_AVAILABLE:
|
|
self.pa = pyaudio.PyAudio()
|
|
self.stream = None
|
|
|
|
# Stats
|
|
self.bytes_received = 0
|
|
|
|
async def connect(self):
|
|
"""Connect to server."""
|
|
print(f"Connecting to {self.url}...")
|
|
self.ws = await websockets.connect(self.url)
|
|
self.running = True
|
|
print("Connected!")
|
|
|
|
# Send invite
|
|
await self.ws.send(json.dumps({
|
|
"command": "invite",
|
|
"option": {"codec": "pcm", "sampleRate": self.sample_rate}
|
|
}))
|
|
print("-> invite")
|
|
|
|
async def send_chat(self, text: str):
|
|
"""Send chat message."""
|
|
await self.ws.send(json.dumps({"command": "chat", "text": text}))
|
|
print(f"-> chat: {text}")
|
|
|
|
def play_audio(self, audio_data: bytes):
|
|
"""Play audio data immediately."""
|
|
if len(audio_data) == 0:
|
|
return
|
|
|
|
if PYAUDIO_AVAILABLE:
|
|
# Use PyAudio - more reliable on Windows
|
|
if self.stream is None:
|
|
self.stream = self.pa.open(
|
|
format=pyaudio.paInt16,
|
|
channels=1,
|
|
rate=self.sample_rate,
|
|
output=True,
|
|
frames_per_buffer=1024
|
|
)
|
|
self.stream.write(audio_data)
|
|
elif SD_AVAILABLE:
|
|
# Use sounddevice
|
|
samples = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32767.0
|
|
sd.play(samples, self.sample_rate, blocking=True)
|
|
|
|
async def receive_loop(self):
|
|
"""Receive and play audio."""
|
|
print("\nWaiting for response...")
|
|
|
|
while self.running:
|
|
try:
|
|
msg = await asyncio.wait_for(self.ws.recv(), timeout=0.1)
|
|
|
|
if isinstance(msg, bytes):
|
|
# Audio data
|
|
self.bytes_received += len(msg)
|
|
duration_ms = len(msg) / (self.sample_rate * 2) * 1000
|
|
print(f"<- audio: {len(msg)} bytes ({duration_ms:.0f}ms)")
|
|
|
|
# Play immediately in executor to not block
|
|
loop = asyncio.get_event_loop()
|
|
await loop.run_in_executor(None, self.play_audio, msg)
|
|
else:
|
|
# JSON event
|
|
event = json.loads(msg)
|
|
etype = event.get("event", "?")
|
|
|
|
if etype == "transcript":
|
|
# User speech transcription
|
|
text = event.get("text", "")
|
|
is_final = event.get("isFinal", False)
|
|
if is_final:
|
|
print(f"<- You said: {text}")
|
|
else:
|
|
print(f"<- [listening] {text}", end="\r")
|
|
elif etype == "hangup":
|
|
print(f"<- {etype}")
|
|
self.running = False
|
|
break
|
|
else:
|
|
print(f"<- {etype}")
|
|
|
|
except asyncio.TimeoutError:
|
|
continue
|
|
except websockets.ConnectionClosed:
|
|
print("Connection closed")
|
|
self.running = False
|
|
break
|
|
|
|
async def run(self, text: str = None):
|
|
"""Run the client."""
|
|
try:
|
|
await self.connect()
|
|
await asyncio.sleep(0.5)
|
|
|
|
# Start receiver
|
|
recv_task = asyncio.create_task(self.receive_loop())
|
|
|
|
if text:
|
|
await self.send_chat(text)
|
|
# Wait for response
|
|
await asyncio.sleep(30)
|
|
else:
|
|
# Interactive mode
|
|
print("\nType a message and press Enter (or 'quit' to exit):")
|
|
while self.running:
|
|
try:
|
|
user_input = await asyncio.get_event_loop().run_in_executor(
|
|
None, input, "> "
|
|
)
|
|
if user_input.lower() == 'quit':
|
|
break
|
|
if user_input.strip():
|
|
await self.send_chat(user_input)
|
|
except EOFError:
|
|
break
|
|
|
|
self.running = False
|
|
recv_task.cancel()
|
|
try:
|
|
await recv_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
finally:
|
|
await self.close()
|
|
|
|
async def close(self):
|
|
"""Close connections."""
|
|
self.running = False
|
|
|
|
if PYAUDIO_AVAILABLE:
|
|
if self.stream:
|
|
self.stream.stop_stream()
|
|
self.stream.close()
|
|
self.pa.terminate()
|
|
|
|
if self.ws:
|
|
await self.ws.close()
|
|
|
|
print(f"\nTotal audio received: {self.bytes_received / 1024:.1f} KB")
|
|
|
|
|
|
def list_audio_devices():
|
|
"""List available audio devices."""
|
|
print("\n=== Audio Devices ===")
|
|
|
|
if PYAUDIO_AVAILABLE:
|
|
pa = pyaudio.PyAudio()
|
|
print("\nPyAudio devices:")
|
|
for i in range(pa.get_device_count()):
|
|
info = pa.get_device_info_by_index(i)
|
|
if info['maxOutputChannels'] > 0:
|
|
default = " [DEFAULT]" if i == pa.get_default_output_device_info()['index'] else ""
|
|
print(f" {i}: {info['name']}{default}")
|
|
pa.terminate()
|
|
|
|
if SD_AVAILABLE:
|
|
print("\nSounddevice devices:")
|
|
for i, d in enumerate(sd.query_devices()):
|
|
if d['max_output_channels'] > 0:
|
|
default = " [DEFAULT]" if i == sd.default.device[1] else ""
|
|
print(f" {i}: {d['name']}{default}")
|
|
|
|
|
|
async def main():
|
|
parser = argparse.ArgumentParser(description="Simple voice client")
|
|
parser.add_argument("--url", default="ws://localhost:8000/ws")
|
|
parser.add_argument("--text", help="Send text and play response")
|
|
parser.add_argument("--list-devices", action="store_true")
|
|
parser.add_argument("--sample-rate", type=int, default=16000)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.list_devices:
|
|
list_audio_devices()
|
|
return
|
|
|
|
client = SimpleVoiceClient(args.url, args.sample_rate)
|
|
await client.run(args.text)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|