minimal ws echo server

2026-01-28 11:51:46 +08:00
parent 14013608a9
commit 4cb267a288
2 changed files with 297 additions and 0 deletions
--- a/main.py
+++ b/main.py
@@ -0,0 +1,137 @@
 """
 Step 1: Minimal WebSocket Echo Server
 This is the simplest possible WebSocket audio server.
 It accepts connections and echoes back events.
 What you'll learn:
 - How to create a FastAPI WebSocket endpoint
 - How to handle mixed text/binary frames
 - Basic event sending
 Test with:
    python main.py
    python test_client.py
 """
 import asyncio
 import json
 import uuid
 from fastapi import FastAPI, WebSocket
 from loguru import logger
 # Configure logging
 logger.remove()
 logger.add(lambda msg: print(msg, end=""), level="INFO", format="<green>{time:HH:mm:ss}</green> | {level} | {message}")
 # Create FastAPI app
 app = FastAPI(title="Voice Gateway - Step 1")
@app.get("/health")
 async def health_check():
    """Health check endpoint."""
    return {"status": "healthy", "step": "1_minimal_echo"}
@app.websocket("/ws")
 async def websocket_endpoint(websocket: WebSocket):
    """
    WebSocket endpoint for audio streaming.
    This is a minimal echo server that:
    1. Accepts WebSocket connections
    2. Sends a welcome event
    3. Receives text commands and binary audio
    4. Echoes speaking events back
    """
    await websocket.accept()
    # Generate unique session ID
    session_id = str(uuid.uuid4())
    logger.info(f"[{session_id}] Client connected")
    try:
        # Send welcome event (answer)
        await websocket.send_json({
            "event": "answer",
            "trackId": session_id,
            "timestamp": _get_timestamp_ms()
        })
        logger.info(f"[{session_id}] Sent answer event")
        # Message receive loop
        while True:
            message = await websocket.receive()
            # Handle binary audio data
            if "bytes" in message:
                audio_bytes = message["bytes"]
                logger.info(f"[{session_id}] Received audio: {len(audio_bytes)} bytes")
                # Send speaking event (echo back)
                await websocket.send_json({
                    "event": "speaking",
                    "trackId": session_id,
                    "timestamp": _get_timestamp_ms(),
                    "startTime": _get_timestamp_ms()
                })
            # Handle text commands
            elif "text" in message:
                text_data = message["text"]
                logger.info(f"[{session_id}] Received text: {text_data[:100]}...")
                try:
                    data = json.loads(text_data)
                    command = data.get("command", "unknown")
                    logger.info(f"[{session_id}] Command: {command}")
                    # Handle basic commands
                    if command == "invite":
                        await websocket.send_json({
                            "event": "answer",
                            "trackId": session_id,
                            "timestamp": _get_timestamp_ms()
                        })
                        logger.info(f"[{session_id}] Responded to invite")
                    elif command == "hangup":
                        logger.info(f"[{session_id}] Hangup requested")
                        break
                    elif command == "ping":
                        await websocket.send_json({
                            "event": "pong",
                            "timestamp": _get_timestamp_ms()
                        })
                except json.JSONDecodeError as e:
                    logger.error(f"[{session_id}] Invalid JSON: {e}")
    except Exception as e:
        logger.error(f"[{session_id}] Error: {e}")
    finally:
        logger.info(f"[{session_id}] Connection closed")
 def _get_timestamp_ms() -> int:
    """Get current timestamp in milliseconds."""
    import time
    return int(time.time() * 1000)
 if __name__ == "__main__":
    import uvicorn
    logger.info("🚀 Starting Step 1: Minimal WebSocket Echo Server")
    logger.info("📡 Server: ws://localhost:8000/ws")
    logger.info("🩺 Health: http://localhost:8000/health")
    uvicorn.run(
        app,
        host="0.0.0.0",
        port=8000,
        log_level="info"
    )
--- a/test_client.py
+++ b/test_client.py
@@ -0,0 +1,160 @@
 """
 WebSocket Test Client
 Tests the WebSocket server with sine wave audio generation.
 Usage:
    python test_client.py
    python test_client.py --url ws://localhost:8000/ws
 """
 import asyncio
 import aiohttp
 import json
 import struct
 import math
 import argparse
 from datetime import datetime
 # Configuration
 SERVER_URL = "ws://localhost:8000/ws"
 SAMPLE_RATE = 16000
 FREQUENCY = 440  # 440Hz sine wave
 CHUNK_DURATION_MS = 20
 CHUNK_SIZE_BYTES = int(SAMPLE_RATE * 2 * (CHUNK_DURATION_MS / 1000.0))  # 640 bytes
 def generate_sine_wave(duration_ms=1000):
    """
    Generate sine wave audio data.
    Format: 16kHz, mono, 16-bit PCM
    """
    num_samples = int(SAMPLE_RATE * (duration_ms / 1000.0))
    audio_data = bytearray()
    for x in range(num_samples):
        # Generate sine wave sample
        value = int(32767.0 * math.sin(2 * math.pi * FREQUENCY * x / SAMPLE_RATE))
        # Pack as little-endian 16-bit signed integer
        audio_data.extend(struct.pack('<h', value))
    return audio_data
 async def receive_loop(ws, session_id):
    """
    Listen for incoming messages from the server.
    """
    print("👂 Listening for server responses...")
    async for msg in ws:
        timestamp = datetime.now().strftime("%H:%M:%S")
        if msg.type == aiohttp.WSMsgType.TEXT:
            try:
                data = json.loads(msg.data)
                event_type = data.get('event', 'Unknown')
                print(f"[{timestamp}] 📨 Event: {event_type}")
                print(f"           {json.dumps(data, indent=2)}")
            except json.JSONDecodeError:
                print(f"[{timestamp}] 📨 Text: {msg.data[:100]}...")
        elif msg.type == aiohttp.WSMsgType.BINARY:
            # Received audio chunk back
            print(f"[{timestamp}] 🔊 Audio: {len(msg.data)} bytes")
        elif msg.type == aiohttp.WSMsgType.CLOSED:
            print(f"\n[{timestamp}] ❌ Connection closed")
            break
        elif msg.type == aiohttp.WSMsgType.ERROR:
            print(f"\n[{timestamp}] ⚠️ WebSocket error")
            break
 async def send_audio_loop(ws):
    """
    Stream sine wave audio to the server.
    """
    print("🎙️  Starting audio stream (sine wave)...")
    # Generate 5 seconds of audio
    audio_buffer = generate_sine_wave(5000)
    cursor = 0
    while cursor < len(audio_buffer):
        chunk = audio_buffer[cursor:cursor + CHUNK_SIZE_BYTES]
        if not chunk:
            break
        await ws.send_bytes(chunk)
        print(f"📤 Sent audio chunk: {len(chunk)} bytes", end="\r")
        cursor += len(chunk)
        # Sleep to simulate real-time (20ms per chunk)
        await asyncio.sleep(CHUNK_DURATION_MS / 1000.0)
    print("\n✅ Finished streaming audio")
 async def run_client(url):
    """
    Run the WebSocket test client.
    """
    session = aiohttp.ClientSession()
    try:
        print(f"🔌 Connecting to {url}...")
        async with session.ws_connect(url) as ws:
            print("✅ Connected!")
            print()
            # Send invite command
            invite_cmd = {
                "command": "invite",
                "option": {
                    "codec": "pcm",
                    "samplerate": SAMPLE_RATE
                }
            }
            await ws.send_json(invite_cmd)
            print("📤 Sent invite command")
            print()
            # Send a ping command
            ping_cmd = {"command": "ping"}
            await ws.send_json(ping_cmd)
            print("📤 Sent ping command")
            print()
            # Wait a moment for responses
            await asyncio.sleep(1)
            # Run audio streaming and receiving in parallel
            await asyncio.gather(
                receive_loop(ws, None),
                send_audio_loop(ws)
            )
    except aiohttp.ClientConnectorError:
        print(f"❌ Connection failed. Is the server running at {url}?")
        print(f"   Start server with: python main.py")
    except Exception as e:
        print(f"❌ Error: {e}")
    finally:
        await session.close()
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="WebSocket Audio Test Client")
    parser.add_argument("--url", default=SERVER_URL, help="WebSocket URL")
    args = parser.parse_args()
    try:
        asyncio.run(run_client(args.url))
    except KeyboardInterrupt:
        print("\n👋 Client stopped.")