minimal ws echo server

This commit is contained in:
Xin Wang
2026-01-28 11:51:46 +08:00
parent 14013608a9
commit 4cb267a288
2 changed files with 297 additions and 0 deletions

137
main.py Normal file
View File

@@ -0,0 +1,137 @@
"""
Step 1: Minimal WebSocket Echo Server
This is the simplest possible WebSocket audio server.
It accepts connections and echoes back events.
What you'll learn:
- How to create a FastAPI WebSocket endpoint
- How to handle mixed text/binary frames
- Basic event sending
Test with:
python main.py
python test_client.py
"""
import asyncio
import json
import uuid
from fastapi import FastAPI, WebSocket
from loguru import logger
# Configure logging
logger.remove()
logger.add(lambda msg: print(msg, end=""), level="INFO", format="<green>{time:HH:mm:ss}</green> | {level} | {message}")
# Create FastAPI app
app = FastAPI(title="Voice Gateway - Step 1")
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {"status": "healthy", "step": "1_minimal_echo"}
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
"""
WebSocket endpoint for audio streaming.
This is a minimal echo server that:
1. Accepts WebSocket connections
2. Sends a welcome event
3. Receives text commands and binary audio
4. Echoes speaking events back
"""
await websocket.accept()
# Generate unique session ID
session_id = str(uuid.uuid4())
logger.info(f"[{session_id}] Client connected")
try:
# Send welcome event (answer)
await websocket.send_json({
"event": "answer",
"trackId": session_id,
"timestamp": _get_timestamp_ms()
})
logger.info(f"[{session_id}] Sent answer event")
# Message receive loop
while True:
message = await websocket.receive()
# Handle binary audio data
if "bytes" in message:
audio_bytes = message["bytes"]
logger.info(f"[{session_id}] Received audio: {len(audio_bytes)} bytes")
# Send speaking event (echo back)
await websocket.send_json({
"event": "speaking",
"trackId": session_id,
"timestamp": _get_timestamp_ms(),
"startTime": _get_timestamp_ms()
})
# Handle text commands
elif "text" in message:
text_data = message["text"]
logger.info(f"[{session_id}] Received text: {text_data[:100]}...")
try:
data = json.loads(text_data)
command = data.get("command", "unknown")
logger.info(f"[{session_id}] Command: {command}")
# Handle basic commands
if command == "invite":
await websocket.send_json({
"event": "answer",
"trackId": session_id,
"timestamp": _get_timestamp_ms()
})
logger.info(f"[{session_id}] Responded to invite")
elif command == "hangup":
logger.info(f"[{session_id}] Hangup requested")
break
elif command == "ping":
await websocket.send_json({
"event": "pong",
"timestamp": _get_timestamp_ms()
})
except json.JSONDecodeError as e:
logger.error(f"[{session_id}] Invalid JSON: {e}")
except Exception as e:
logger.error(f"[{session_id}] Error: {e}")
finally:
logger.info(f"[{session_id}] Connection closed")
def _get_timestamp_ms() -> int:
"""Get current timestamp in milliseconds."""
import time
return int(time.time() * 1000)
if __name__ == "__main__":
import uvicorn
logger.info("🚀 Starting Step 1: Minimal WebSocket Echo Server")
logger.info("📡 Server: ws://localhost:8000/ws")
logger.info("🩺 Health: http://localhost:8000/health")
uvicorn.run(
app,
host="0.0.0.0",
port=8000,
log_level="info"
)

160
test_client.py Normal file
View File

@@ -0,0 +1,160 @@
"""
WebSocket Test Client
Tests the WebSocket server with sine wave audio generation.
Usage:
python test_client.py
python test_client.py --url ws://localhost:8000/ws
"""
import asyncio
import aiohttp
import json
import struct
import math
import argparse
from datetime import datetime
# Configuration
SERVER_URL = "ws://localhost:8000/ws"
SAMPLE_RATE = 16000
FREQUENCY = 440 # 440Hz sine wave
CHUNK_DURATION_MS = 20
CHUNK_SIZE_BYTES = int(SAMPLE_RATE * 2 * (CHUNK_DURATION_MS / 1000.0)) # 640 bytes
def generate_sine_wave(duration_ms=1000):
"""
Generate sine wave audio data.
Format: 16kHz, mono, 16-bit PCM
"""
num_samples = int(SAMPLE_RATE * (duration_ms / 1000.0))
audio_data = bytearray()
for x in range(num_samples):
# Generate sine wave sample
value = int(32767.0 * math.sin(2 * math.pi * FREQUENCY * x / SAMPLE_RATE))
# Pack as little-endian 16-bit signed integer
audio_data.extend(struct.pack('<h', value))
return audio_data
async def receive_loop(ws, session_id):
"""
Listen for incoming messages from the server.
"""
print("👂 Listening for server responses...")
async for msg in ws:
timestamp = datetime.now().strftime("%H:%M:%S")
if msg.type == aiohttp.WSMsgType.TEXT:
try:
data = json.loads(msg.data)
event_type = data.get('event', 'Unknown')
print(f"[{timestamp}] 📨 Event: {event_type}")
print(f" {json.dumps(data, indent=2)}")
except json.JSONDecodeError:
print(f"[{timestamp}] 📨 Text: {msg.data[:100]}...")
elif msg.type == aiohttp.WSMsgType.BINARY:
# Received audio chunk back
print(f"[{timestamp}] 🔊 Audio: {len(msg.data)} bytes")
elif msg.type == aiohttp.WSMsgType.CLOSED:
print(f"\n[{timestamp}] ❌ Connection closed")
break
elif msg.type == aiohttp.WSMsgType.ERROR:
print(f"\n[{timestamp}] ⚠️ WebSocket error")
break
async def send_audio_loop(ws):
"""
Stream sine wave audio to the server.
"""
print("🎙️ Starting audio stream (sine wave)...")
# Generate 5 seconds of audio
audio_buffer = generate_sine_wave(5000)
cursor = 0
while cursor < len(audio_buffer):
chunk = audio_buffer[cursor:cursor + CHUNK_SIZE_BYTES]
if not chunk:
break
await ws.send_bytes(chunk)
print(f"📤 Sent audio chunk: {len(chunk)} bytes", end="\r")
cursor += len(chunk)
# Sleep to simulate real-time (20ms per chunk)
await asyncio.sleep(CHUNK_DURATION_MS / 1000.0)
print("\n✅ Finished streaming audio")
async def run_client(url):
"""
Run the WebSocket test client.
"""
session = aiohttp.ClientSession()
try:
print(f"🔌 Connecting to {url}...")
async with session.ws_connect(url) as ws:
print("✅ Connected!")
print()
# Send invite command
invite_cmd = {
"command": "invite",
"option": {
"codec": "pcm",
"samplerate": SAMPLE_RATE
}
}
await ws.send_json(invite_cmd)
print("📤 Sent invite command")
print()
# Send a ping command
ping_cmd = {"command": "ping"}
await ws.send_json(ping_cmd)
print("📤 Sent ping command")
print()
# Wait a moment for responses
await asyncio.sleep(1)
# Run audio streaming and receiving in parallel
await asyncio.gather(
receive_loop(ws, None),
send_audio_loop(ws)
)
except aiohttp.ClientConnectorError:
print(f"❌ Connection failed. Is the server running at {url}?")
print(f" Start server with: python main.py")
except Exception as e:
print(f"❌ Error: {e}")
finally:
await session.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="WebSocket Audio Test Client")
parser.add_argument("--url", default=SERVER_URL, help="WebSocket URL")
args = parser.parse_args()
try:
asyncio.run(run_client(args.url))
except KeyboardInterrupt:
print("\n👋 Client stopped.")