Enhance WebSocket session management by requiring assistant_id as a query parameter for connection. Update API reference documentation to reflect changes in message flow and metadata validation rules, including the introduction of whitelists for allowed metadata fields and restrictions on sensitive keys. Refactor client examples to align with the new session initiation process.

2026-03-01 14:10:38 +08:00
parent b4fa664d73
commit 6a46ec69f4
14 changed files with 725 additions and 424 deletions
--- a/engine/examples/wav_client.py
+++ b/engine/examples/wav_client.py
@@ -21,6 +21,7 @@ import sys
 import time
 import wave
 from pathlib import Path
+from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit

 try:
    import numpy as np
@@ -57,9 +58,8 @@ class WavFileClient:
        url: str,
        input_file: str,
        output_file: str,
-        app_id: str = "assistant_demo",
+        assistant_id: str = "assistant_demo",
        channel: str = "wav_client",
-        config_version_id: str = "local-dev",
        sample_rate: int = 16000,
        chunk_duration_ms: int = 20,
        wait_time: float = 15.0,
@@ -82,9 +82,8 @@ class WavFileClient:
        self.url = url
        self.input_file = Path(input_file)
        self.output_file = Path(output_file)
-        self.app_id = app_id
+        self.assistant_id = assistant_id
        self.channel = channel
-        self.config_version_id = config_version_id
        self.sample_rate = sample_rate
        self.chunk_duration_ms = chunk_duration_ms
        self.chunk_samples = int(sample_rate * chunk_duration_ms / 1000)
@@ -147,19 +146,21 @@ class WavFileClient:
            if value:
                parts.append(f"{key}={value}")
        return f" [{' '.join(parts)}]" if parts else ""
+
+    def _session_url(self) -> str:
+        parts = urlsplit(self.url)
+        query = dict(parse_qsl(parts.query, keep_blank_values=True))
+        query["assistant_id"] = self.assistant_id
+        return urlunsplit((parts.scheme, parts.netloc, parts.path, urlencode(query), parts.fragment))
    
    async def connect(self) -> None:
        """Connect to WebSocket server."""
-        self.log_event("→", f"Connecting to {self.url}...")
-        self.ws = await websockets.connect(self.url)
+        session_url = self._session_url()
+        self.log_event("→", f"Connecting to {session_url}...")
+        self.ws = await websockets.connect(session_url)
        self.running = True
        self.log_event("←", "Connected!")

-        # WS v1 handshake: hello -> session.start
-        await self.send_command({
-            "type": "hello",
-            "version": "v1",
-        })
        await self.send_command({
            "type": "session.start",
            "audio": {
@@ -168,9 +169,8 @@ class WavFileClient:
                "channels": 1
            },
            "metadata": {
-                "appId": self.app_id,
                "channel": self.channel,
-                "configVersionId": self.config_version_id,
+                "source": "wav_client",
            },
        })
    
@@ -329,9 +329,7 @@ class WavFileClient:
        if self.track_debug:
            print(f"[track-debug] event={event_type} trackId={event.get('trackId')}{ids}")

-        if event_type == "hello.ack":
-            self.log_event("←", f"Handshake acknowledged{ids}")
-        elif event_type == "session.started":
+        if event_type == "session.started":
            self.session_ready = True
            self.log_event("←", f"Session ready!{ids}")
        elif event_type == "config.resolved":
@@ -521,20 +519,15 @@ async def main():
        help="Target sample rate for audio (default: 16000)"
    )
    parser.add_argument(
-        "--app-id",
+        "--assistant-id",
        default="assistant_demo",
-        help="Stable app/assistant identifier for server-side config lookup"
+        help="Assistant identifier used in websocket query parameter"
    )
    parser.add_argument(
        "--channel",
        default="wav_client",
        help="Client channel name"
    )
-    parser.add_argument(
-        "--config-version-id",
-        default="local-dev",
-        help="Optional config version identifier"
-    )
    parser.add_argument(
        "--chunk-duration",
        type=int,
@@ -570,9 +563,8 @@ async def main():
        url=args.url,
        input_file=args.input,
        output_file=args.output,
-        app_id=args.app_id,
+        assistant_id=args.assistant_id,
        channel=args.channel,
-        config_version_id=args.config_version_id,
        sample_rate=args.sample_rate,
        chunk_duration_ms=args.chunk_duration,
        wait_time=args.wait_time,