Enhance WebSocket session management by requiring assistant_id as a query parameter for connection. Update API reference documentation to reflect changes in message flow and metadata validation rules, including the introduction of whitelists for allowed metadata fields and restrictions on sensitive keys. Refactor client examples to align with the new session initiation process.

This commit is contained in:
Xin Wang
2026-03-01 14:10:38 +08:00
parent b4fa664d73
commit 6a46ec69f4
14 changed files with 725 additions and 424 deletions

View File

@@ -15,6 +15,7 @@ import sys
import time
import wave
import io
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
try:
import numpy as np
@@ -56,16 +57,14 @@ class SimpleVoiceClient:
self,
url: str,
sample_rate: int = 16000,
app_id: str = "assistant_demo",
assistant_id: str = "assistant_demo",
channel: str = "simple_client",
config_version_id: str = "local-dev",
track_debug: bool = False,
):
self.url = url
self.sample_rate = sample_rate
self.app_id = app_id
self.assistant_id = assistant_id
self.channel = channel
self.config_version_id = config_version_id
self.track_debug = track_debug
self.ws = None
self.running = False
@@ -88,6 +87,12 @@ class SimpleVoiceClient:
# Interrupt handling - discard audio until next trackStart
self._discard_audio = False
def _session_url(self) -> str:
parts = urlsplit(self.url)
query = dict(parse_qsl(parts.query, keep_blank_values=True))
query["assistant_id"] = self.assistant_id
return urlunsplit((parts.scheme, parts.netloc, parts.path, urlencode(query), parts.fragment))
@staticmethod
def _event_ids_suffix(event: dict) -> str:
data = event.get("data") if isinstance(event.get("data"), dict) else {}
@@ -101,16 +106,12 @@ class SimpleVoiceClient:
async def connect(self):
"""Connect to server."""
print(f"Connecting to {self.url}...")
self.ws = await websockets.connect(self.url)
session_url = self._session_url()
print(f"Connecting to {session_url}...")
self.ws = await websockets.connect(session_url)
self.running = True
print("Connected!")
# WS v1 handshake: hello -> session.start
await self.ws.send(json.dumps({
"type": "hello",
"version": "v1",
}))
await self.ws.send(json.dumps({
"type": "session.start",
"audio": {
@@ -119,12 +120,11 @@ class SimpleVoiceClient:
"channels": 1,
},
"metadata": {
"appId": self.app_id,
"channel": self.channel,
"configVersionId": self.config_version_id,
"source": "simple_client",
},
}))
print("-> hello/session.start")
print("-> session.start")
async def send_chat(self, text: str):
"""Send chat message."""
@@ -311,9 +311,8 @@ async def main():
parser.add_argument("--text", help="Send text and play response")
parser.add_argument("--list-devices", action="store_true")
parser.add_argument("--sample-rate", type=int, default=16000)
parser.add_argument("--app-id", default="assistant_demo")
parser.add_argument("--assistant-id", default="assistant_demo")
parser.add_argument("--channel", default="simple_client")
parser.add_argument("--config-version-id", default="local-dev")
parser.add_argument("--track-debug", action="store_true")
args = parser.parse_args()
@@ -325,9 +324,8 @@ async def main():
client = SimpleVoiceClient(
args.url,
args.sample_rate,
app_id=args.app_id,
assistant_id=args.assistant_id,
channel=args.channel,
config_version_id=args.config_version_id,
track_debug=args.track_debug,
)
await client.run(args.text)