From 4cb267a288d78ad4146422da3f3fa6ce00deb1ec Mon Sep 17 00:00:00 2001
From: Xin Wang <wangx184@chinatelecom.cn>
Date: Wed, 28 Jan 2026 11:51:46 +0800
Subject: [PATCH] minimal ws echo server

---
 main.py        | 137 ++++++++++++++++++++++++++++++++++++++++++
 test_client.py | 160 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 297 insertions(+)
 create mode 100644 main.py
 create mode 100644 test_client.py
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..1967b0c
--- /dev/null
+++ b/main.py
@@ -0,0 +1,137 @@
+"""
+Step 1: Minimal WebSocket Echo Server
+
+This is the simplest possible WebSocket audio server.
+It accepts connections and echoes back events.
+
+What you'll learn:
+- How to create a FastAPI WebSocket endpoint
+- How to handle mixed text/binary frames
+- Basic event sending
+
+Test with:
+    python main.py
+    python test_client.py
+"""
+
+import asyncio
+import json
+import uuid
+from fastapi import FastAPI, WebSocket
+from loguru import logger
+
+# Configure logging
+logger.remove()
+logger.add(lambda msg: print(msg, end=""), level="INFO", format="<green>{time:HH:mm:ss}</green> | {level} | {message}")
+
+# Create FastAPI app
+app = FastAPI(title="Voice Gateway - Step 1")
+
+
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {"status": "healthy", "step": "1_minimal_echo"}
+
+
+@app.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket):
+    """
+    WebSocket endpoint for audio streaming.
+
+    This is a minimal echo server that:
+    1. Accepts WebSocket connections
+    2. Sends a welcome event
+    3. Receives text commands and binary audio
+    4. Echoes speaking events back
+    """
+    await websocket.accept()
+
+    # Generate unique session ID
+    session_id = str(uuid.uuid4())
+    logger.info(f"[{session_id}] Client connected")
+
+    try:
+        # Send welcome event (answer)
+        await websocket.send_json({
+            "event": "answer",
+            "trackId": session_id,
+            "timestamp": _get_timestamp_ms()
+        })
+        logger.info(f"[{session_id}] Sent answer event")
+
+        # Message receive loop
+        while True:
+            message = await websocket.receive()
+
+            # Handle binary audio data
+            if "bytes" in message:
+                audio_bytes = message["bytes"]
+                logger.info(f"[{session_id}] Received audio: {len(audio_bytes)} bytes")
+
+                # Send speaking event (echo back)
+                await websocket.send_json({
+                    "event": "speaking",
+                    "trackId": session_id,
+                    "timestamp": _get_timestamp_ms(),
+                    "startTime": _get_timestamp_ms()
+                })
+
+            # Handle text commands
+            elif "text" in message:
+                text_data = message["text"]
+                logger.info(f"[{session_id}] Received text: {text_data[:100]}...")
+
+                try:
+                    data = json.loads(text_data)
+                    command = data.get("command", "unknown")
+                    logger.info(f"[{session_id}] Command: {command}")
+
+                    # Handle basic commands
+                    if command == "invite":
+                        await websocket.send_json({
+                            "event": "answer",
+                            "trackId": session_id,
+                            "timestamp": _get_timestamp_ms()
+                        })
+                        logger.info(f"[{session_id}] Responded to invite")
+
+                    elif command == "hangup":
+                        logger.info(f"[{session_id}] Hangup requested")
+                        break
+
+                    elif command == "ping":
+                        await websocket.send_json({
+                            "event": "pong",
+                            "timestamp": _get_timestamp_ms()
+                        })
+
+                except json.JSONDecodeError as e:
+                    logger.error(f"[{session_id}] Invalid JSON: {e}")
+
+    except Exception as e:
+        logger.error(f"[{session_id}] Error: {e}")
+
+    finally:
+        logger.info(f"[{session_id}] Connection closed")
+
+
+def _get_timestamp_ms() -> int:
+    """Get current timestamp in milliseconds."""
+    import time
+    return int(time.time() * 1000)
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    logger.info("🚀 Starting Step 1: Minimal WebSocket Echo Server")
+    logger.info("📡 Server: ws://localhost:8000/ws")
+    logger.info("🩺 Health: http://localhost:8000/health")
+
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=8000,
+        log_level="info"
+    )
diff --git a/test_client.py b/test_client.py
new file mode 100644
index 0000000..f2a326f
--- /dev/null
+++ b/test_client.py
@@ -0,0 +1,160 @@
+"""
+WebSocket Test Client
+
+Tests the WebSocket server with sine wave audio generation.
+
+Usage:
+    python test_client.py
+    python test_client.py --url ws://localhost:8000/ws
+"""
+
+import asyncio
+import aiohttp
+import json
+import struct
+import math
+import argparse
+from datetime import datetime
+
+# Configuration
+SERVER_URL = "ws://localhost:8000/ws"
+SAMPLE_RATE = 16000
+FREQUENCY = 440  # 440Hz sine wave
+CHUNK_DURATION_MS = 20
+CHUNK_SIZE_BYTES = int(SAMPLE_RATE * 2 * (CHUNK_DURATION_MS / 1000.0))  # 640 bytes
+
+
+def generate_sine_wave(duration_ms=1000):
+    """
+    Generate sine wave audio data.
+
+    Format: 16kHz, mono, 16-bit PCM
+    """
+    num_samples = int(SAMPLE_RATE * (duration_ms / 1000.0))
+    audio_data = bytearray()
+
+    for x in range(num_samples):
+        # Generate sine wave sample
+        value = int(32767.0 * math.sin(2 * math.pi * FREQUENCY * x / SAMPLE_RATE))
+        # Pack as little-endian 16-bit signed integer
+        audio_data.extend(struct.pack('<h', value))
+
+    return audio_data
+
+
+async def receive_loop(ws, session_id):
+    """
+    Listen for incoming messages from the server.
+    """
+    print("👂 Listening for server responses...")
+    async for msg in ws:
+        timestamp = datetime.now().strftime("%H:%M:%S")
+
+        if msg.type == aiohttp.WSMsgType.TEXT:
+            try:
+                data = json.loads(msg.data)
+                event_type = data.get('event', 'Unknown')
+                print(f"[{timestamp}] 📨 Event: {event_type}")
+                print(f"           {json.dumps(data, indent=2)}")
+            except json.JSONDecodeError:
+                print(f"[{timestamp}] 📨 Text: {msg.data[:100]}...")
+
+        elif msg.type == aiohttp.WSMsgType.BINARY:
+            # Received audio chunk back
+            print(f"[{timestamp}] 🔊 Audio: {len(msg.data)} bytes")
+
+        elif msg.type == aiohttp.WSMsgType.CLOSED:
+            print(f"\n[{timestamp}] ❌ Connection closed")
+            break
+
+        elif msg.type == aiohttp.WSMsgType.ERROR:
+            print(f"\n[{timestamp}] ⚠️ WebSocket error")
+            break
+
+
+async def send_audio_loop(ws):
+    """
+    Stream sine wave audio to the server.
+    """
+    print("🎙️  Starting audio stream (sine wave)...")
+
+    # Generate 5 seconds of audio
+    audio_buffer = generate_sine_wave(5000)
+    cursor = 0
+
+    while cursor < len(audio_buffer):
+        chunk = audio_buffer[cursor:cursor + CHUNK_SIZE_BYTES]
+        if not chunk:
+            break
+
+        await ws.send_bytes(chunk)
+        print(f"📤 Sent audio chunk: {len(chunk)} bytes", end="\r")
+
+        cursor += len(chunk)
+
+        # Sleep to simulate real-time (20ms per chunk)
+        await asyncio.sleep(CHUNK_DURATION_MS / 1000.0)
+
+    print("\n✅ Finished streaming audio")
+
+
+async def run_client(url):
+    """
+    Run the WebSocket test client.
+    """
+    session = aiohttp.ClientSession()
+
+    try:
+        print(f"🔌 Connecting to {url}...")
+
+        async with session.ws_connect(url) as ws:
+            print("✅ Connected!")
+            print()
+
+            # Send invite command
+            invite_cmd = {
+                "command": "invite",
+                "option": {
+                    "codec": "pcm",
+                    "samplerate": SAMPLE_RATE
+                }
+            }
+            await ws.send_json(invite_cmd)
+            print("📤 Sent invite command")
+            print()
+
+            # Send a ping command
+            ping_cmd = {"command": "ping"}
+            await ws.send_json(ping_cmd)
+            print("📤 Sent ping command")
+            print()
+
+            # Wait a moment for responses
+            await asyncio.sleep(1)
+
+            # Run audio streaming and receiving in parallel
+            await asyncio.gather(
+                receive_loop(ws, None),
+                send_audio_loop(ws)
+            )
+
+    except aiohttp.ClientConnectorError:
+        print(f"❌ Connection failed. Is the server running at {url}?")
+        print(f"   Start server with: python main.py")
+
+    except Exception as e:
+        print(f"❌ Error: {e}")
+
+    finally:
+        await session.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="WebSocket Audio Test Client")
+    parser.add_argument("--url", default=SERVER_URL, help="WebSocket URL")
+    args = parser.parse_args()
+
+    try:
+        asyncio.run(run_client(args.url))
+    except KeyboardInterrupt:
+        print("\n👋 Client stopped.")