I can use text to get audio response and barge in

2026-01-29 16:25:53 +08:00
parent cd90b4fb37
commit ac0c76e6e8
16 changed files with 3394 additions and 119 deletions
--- a/examples/simple_client.py
+++ b/examples/simple_client.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+"""
+Simple WebSocket client for testing voice conversation.
+Uses PyAudio for more reliable audio playback on Windows.
+
+Usage:
+    python examples/simple_client.py
+    python examples/simple_client.py --text "Hello"
+"""
+
+import argparse
+import asyncio
+import json
+import sys
+import wave
+import io
+
+try:
+    import numpy as np
+except ImportError:
+    print("pip install numpy")
+    sys.exit(1)
+
+try:
+    import websockets
+except ImportError:
+    print("pip install websockets")
+    sys.exit(1)
+
+# Try PyAudio first (more reliable on Windows)
+try:
+    import pyaudio
+    PYAUDIO_AVAILABLE = True
+except ImportError:
+    PYAUDIO_AVAILABLE = False
+    print("PyAudio not available, trying sounddevice...")
+
+try:
+    import sounddevice as sd
+    SD_AVAILABLE = True
+except ImportError:
+    SD_AVAILABLE = False
+
+if not PYAUDIO_AVAILABLE and not SD_AVAILABLE:
+    print("Please install pyaudio or sounddevice:")
+    print("  pip install pyaudio")
+    print("  or: pip install sounddevice")
+    sys.exit(1)
+
+
+class SimpleVoiceClient:
+    """Simple voice client with reliable audio playback."""
+    
+    def __init__(self, url: str, sample_rate: int = 16000):
+        self.url = url
+        self.sample_rate = sample_rate
+        self.ws = None
+        self.running = False
+        
+        # Audio buffer
+        self.audio_buffer = b""
+        
+        # PyAudio setup
+        if PYAUDIO_AVAILABLE:
+            self.pa = pyaudio.PyAudio()
+            self.stream = None
+        
+        # Stats
+        self.bytes_received = 0
+    
+    async def connect(self):
+        """Connect to server."""
+        print(f"Connecting to {self.url}...")
+        self.ws = await websockets.connect(self.url)
+        self.running = True
+        print("Connected!")
+        
+        # Send invite
+        await self.ws.send(json.dumps({
+            "command": "invite",
+            "option": {"codec": "pcm", "sampleRate": self.sample_rate}
+        }))
+        print("-> invite")
+    
+    async def send_chat(self, text: str):
+        """Send chat message."""
+        await self.ws.send(json.dumps({"command": "chat", "text": text}))
+        print(f"-> chat: {text}")
+    
+    def play_audio(self, audio_data: bytes):
+        """Play audio data immediately."""
+        if len(audio_data) == 0:
+            return
+            
+        if PYAUDIO_AVAILABLE:
+            # Use PyAudio - more reliable on Windows
+            if self.stream is None:
+                self.stream = self.pa.open(
+                    format=pyaudio.paInt16,
+                    channels=1,
+                    rate=self.sample_rate,
+                    output=True,
+                    frames_per_buffer=1024
+                )
+            self.stream.write(audio_data)
+        elif SD_AVAILABLE:
+            # Use sounddevice
+            samples = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32767.0
+            sd.play(samples, self.sample_rate, blocking=True)
+    
+    async def receive_loop(self):
+        """Receive and play audio."""
+        print("\nWaiting for response...")
+        
+        while self.running:
+            try:
+                msg = await asyncio.wait_for(self.ws.recv(), timeout=0.1)
+                
+                if isinstance(msg, bytes):
+                    # Audio data
+                    self.bytes_received += len(msg)
+                    duration_ms = len(msg) / (self.sample_rate * 2) * 1000
+                    print(f"<- audio: {len(msg)} bytes ({duration_ms:.0f}ms)")
+                    
+                    # Play immediately in executor to not block
+                    loop = asyncio.get_event_loop()
+                    await loop.run_in_executor(None, self.play_audio, msg)
+                else:
+                    # JSON event
+                    event = json.loads(msg)
+                    etype = event.get("event", "?")
+                    print(f"<- {etype}")
+                    
+                    if etype == "hangup":
+                        self.running = False
+                        break
+                        
+            except asyncio.TimeoutError:
+                continue
+            except websockets.ConnectionClosed:
+                print("Connection closed")
+                self.running = False
+                break
+    
+    async def run(self, text: str = None):
+        """Run the client."""
+        try:
+            await self.connect()
+            await asyncio.sleep(0.5)
+            
+            # Start receiver
+            recv_task = asyncio.create_task(self.receive_loop())
+            
+            if text:
+                await self.send_chat(text)
+                # Wait for response
+                await asyncio.sleep(30)
+            else:
+                # Interactive mode
+                print("\nType a message and press Enter (or 'quit' to exit):")
+                while self.running:
+                    try:
+                        user_input = await asyncio.get_event_loop().run_in_executor(
+                            None, input, "> "
+                        )
+                        if user_input.lower() == 'quit':
+                            break
+                        if user_input.strip():
+                            await self.send_chat(user_input)
+                    except EOFError:
+                        break
+            
+            self.running = False
+            recv_task.cancel()
+            try:
+                await recv_task
+            except asyncio.CancelledError:
+                pass
+                
+        finally:
+            await self.close()
+    
+    async def close(self):
+        """Close connections."""
+        self.running = False
+        
+        if PYAUDIO_AVAILABLE:
+            if self.stream:
+                self.stream.stop_stream()
+                self.stream.close()
+            self.pa.terminate()
+        
+        if self.ws:
+            await self.ws.close()
+        
+        print(f"\nTotal audio received: {self.bytes_received / 1024:.1f} KB")
+
+
+def list_audio_devices():
+    """List available audio devices."""
+    print("\n=== Audio Devices ===")
+    
+    if PYAUDIO_AVAILABLE:
+        pa = pyaudio.PyAudio()
+        print("\nPyAudio devices:")
+        for i in range(pa.get_device_count()):
+            info = pa.get_device_info_by_index(i)
+            if info['maxOutputChannels'] > 0:
+                default = " [DEFAULT]" if i == pa.get_default_output_device_info()['index'] else ""
+                print(f"  {i}: {info['name']}{default}")
+        pa.terminate()
+    
+    if SD_AVAILABLE:
+        print("\nSounddevice devices:")
+        for i, d in enumerate(sd.query_devices()):
+            if d['max_output_channels'] > 0:
+                default = " [DEFAULT]" if i == sd.default.device[1] else ""
+                print(f"  {i}: {d['name']}{default}")
+
+
+async def main():
+    parser = argparse.ArgumentParser(description="Simple voice client")
+    parser.add_argument("--url", default="ws://localhost:8000/ws")
+    parser.add_argument("--text", help="Send text and play response")
+    parser.add_argument("--list-devices", action="store_true")
+    parser.add_argument("--sample-rate", type=int, default=16000)
+    
+    args = parser.parse_args()
+    
+    if args.list_devices:
+        list_audio_devices()
+        return
+    
+    client = SimpleVoiceClient(args.url, args.sample_rate)
+    await client.run(args.text)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())