py-active-call/examples/simple_client.py

#!/usr/bin/env python3
"""
Simple WebSocket client for testing voice conversation.
Uses PyAudio for more reliable audio playback on Windows.

Usage:
    python examples/simple_client.py
    python examples/simple_client.py --text "Hello"
"""

import argparse
import asyncio
import json
import sys
import wave
import io

try:
    import numpy as np
except ImportError:
    print("pip install numpy")
    sys.exit(1)

try:
    import websockets
except ImportError:
    print("pip install websockets")
    sys.exit(1)

# Try PyAudio first (more reliable on Windows)
try:
    import pyaudio
    PYAUDIO_AVAILABLE = True
except ImportError:
    PYAUDIO_AVAILABLE = False
    print("PyAudio not available, trying sounddevice...")

try:
    import sounddevice as sd
    SD_AVAILABLE = True
except ImportError:
    SD_AVAILABLE = False

if not PYAUDIO_AVAILABLE and not SD_AVAILABLE:
    print("Please install pyaudio or sounddevice:")
    print("  pip install pyaudio")
    print("  or: pip install sounddevice")
    sys.exit(1)


class SimpleVoiceClient:
    """Simple voice client with reliable audio playback."""

    def __init__(self, url: str, sample_rate: int = 16000):
        self.url = url
        self.sample_rate = sample_rate
        self.ws = None
        self.running = False

        # Audio buffer
        self.audio_buffer = b""

        # PyAudio setup
        if PYAUDIO_AVAILABLE:
            self.pa = pyaudio.PyAudio()
            self.stream = None

        # Stats
        self.bytes_received = 0

    async def connect(self):
        """Connect to server."""
        print(f"Connecting to {self.url}...")
        self.ws = await websockets.connect(self.url)
        self.running = True
        print("Connected!")

        # Send invite
        await self.ws.send(json.dumps({
            "command": "invite",
            "option": {"codec": "pcm", "sampleRate": self.sample_rate}
        }))
        print("-> invite")

    async def send_chat(self, text: str):
        """Send chat message."""
        await self.ws.send(json.dumps({"command": "chat", "text": text}))
        print(f"-> chat: {text}")

    def play_audio(self, audio_data: bytes):
        """Play audio data immediately."""
        if len(audio_data) == 0:
            return

        if PYAUDIO_AVAILABLE:
            # Use PyAudio - more reliable on Windows
            if self.stream is None:
                self.stream = self.pa.open(
                    format=pyaudio.paInt16,
                    channels=1,
                    rate=self.sample_rate,
                    output=True,
                    frames_per_buffer=1024
                )
            self.stream.write(audio_data)
        elif SD_AVAILABLE:
            # Use sounddevice
            samples = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32767.0
            sd.play(samples, self.sample_rate, blocking=True)

    async def receive_loop(self):
        """Receive and play audio."""
        print("\nWaiting for response...")

        while self.running:
            try:
                msg = await asyncio.wait_for(self.ws.recv(), timeout=0.1)

                if isinstance(msg, bytes):
                    # Audio data
                    self.bytes_received += len(msg)
                    duration_ms = len(msg) / (self.sample_rate * 2) * 1000
                    print(f"<- audio: {len(msg)} bytes ({duration_ms:.0f}ms)")

                    # Play immediately in executor to not block
                    loop = asyncio.get_event_loop()
                    await loop.run_in_executor(None, self.play_audio, msg)
                else:
                    # JSON event
                    event = json.loads(msg)
                    etype = event.get("event", "?")

                    if etype == "transcript":
                        # User speech transcription
                        text = event.get("text", "")
                        is_final = event.get("isFinal", False)
                        if is_final:
                            print(f"<- You said: {text}")
                        else:
                            print(f"<- [listening] {text}", end="\r")
                    elif etype == "hangup":
                        print(f"<- {etype}")
                        self.running = False
                        break
                    else:
                        print(f"<- {etype}")

            except asyncio.TimeoutError:
                continue
            except websockets.ConnectionClosed:
                print("Connection closed")
                self.running = False
                break

    async def run(self, text: str = None):
        """Run the client."""
        try:
            await self.connect()
            await asyncio.sleep(0.5)

            # Start receiver
            recv_task = asyncio.create_task(self.receive_loop())

            if text:
                await self.send_chat(text)
                # Wait for response
                await asyncio.sleep(30)
            else:
                # Interactive mode
                print("\nType a message and press Enter (or 'quit' to exit):")
                while self.running:
                    try:
                        user_input = await asyncio.get_event_loop().run_in_executor(
                            None, input, "> "
                        )
                        if user_input.lower() == 'quit':
                            break
                        if user_input.strip():
                            await self.send_chat(user_input)
                    except EOFError:
                        break

            self.running = False
            recv_task.cancel()
            try:
                await recv_task
            except asyncio.CancelledError:
                pass

        finally:
            await self.close()

    async def close(self):
        """Close connections."""
        self.running = False

        if PYAUDIO_AVAILABLE:
            if self.stream:
                self.stream.stop_stream()
                self.stream.close()
            self.pa.terminate()

        if self.ws:
            await self.ws.close()

        print(f"\nTotal audio received: {self.bytes_received / 1024:.1f} KB")


def list_audio_devices():
    """List available audio devices."""
    print("\n=== Audio Devices ===")

    if PYAUDIO_AVAILABLE:
        pa = pyaudio.PyAudio()
        print("\nPyAudio devices:")
        for i in range(pa.get_device_count()):
            info = pa.get_device_info_by_index(i)
            if info['maxOutputChannels'] > 0:
                default = " [DEFAULT]" if i == pa.get_default_output_device_info()['index'] else ""
                print(f"  {i}: {info['name']}{default}")
        pa.terminate()

    if SD_AVAILABLE:
        print("\nSounddevice devices:")
        for i, d in enumerate(sd.query_devices()):
            if d['max_output_channels'] > 0:
                default = " [DEFAULT]" if i == sd.default.device[1] else ""
                print(f"  {i}: {d['name']}{default}")


async def main():
    parser = argparse.ArgumentParser(description="Simple voice client")
    parser.add_argument("--url", default="ws://localhost:8000/ws")
    parser.add_argument("--text", help="Send text and play response")
    parser.add_argument("--list-devices", action="store_true")
    parser.add_argument("--sample-rate", type=int, default=16000)

    args = parser.parse_args()

    if args.list_devices:
        list_audio_devices()
        return

    client = SimpleVoiceClient(args.url, args.sample_rate)
    await client.run(args.text)


if __name__ == "__main__":
    asyncio.run(main())