Enhance WebSocket session configuration by introducing an optional config.resolved event, which provides a public snapshot of the session's configuration. Update the API reference documentation to clarify the conditions under which this event is emitted and the details it includes. Modify session management to respect the new setting for emitting configuration details, ensuring sensitive information remains secure. Update tests to validate the new behavior and ensure compliance with the updated configuration schema.

This commit is contained in:
Xin Wang
2026-03-01 23:08:44 +08:00
parent 2418df80e5
commit 3643431565
6 changed files with 165 additions and 58 deletions

View File

@@ -23,7 +23,7 @@ ws://<host>/ws?assistant_id=<assistant_id>
``` ```
Client -> session.start Client -> session.start
Server <- session.started Server <- session.started
Server <- config.resolved Server <- (optional) config.resolved
Client -> (binary pcm frames...) Client -> (binary pcm frames...)
Server <- input.speech_started / transcript.delta / transcript.final Server <- input.speech_started / transcript.delta / transcript.final
Server <- assistant.response.delta / assistant.response.final Server <- assistant.response.delta / assistant.response.final
@@ -237,7 +237,7 @@ Server <- session.stopped
|---------|------|---------| |---------|------|---------|
| `audio_in` | ASR/VAD 输入侧事件 | `input.*`, `transcript.*` | | `audio_in` | ASR/VAD 输入侧事件 | `input.*`, `transcript.*` |
| `audio_out` | 助手输出侧事件 | `assistant.*`, `output.audio.*`, `response.interrupted`, `metrics.ttfb` | | `audio_out` | 助手输出侧事件 | `assistant.*`, `output.audio.*`, `response.interrupted`, `metrics.ttfb` |
| `control` | 会话控制事件 | `session.*`, `error`, `config.resolved`, `heartbeat` | | `control` | 会话控制事件 | `session.*`, `error`, `heartbeat`, `(optional) config.resolved` |
--- ---
@@ -284,7 +284,8 @@ Server <- session.stopped
#### `config.resolved` #### `config.resolved`
服务端最终配置快照,在 `session.started` 后立即发送 服务端返回的**公开配置快照**
默认不发送SaaS 公网模式建议关闭);仅在 `WS_EMIT_CONFIG_RESOLVED=true` 时发送。
```json ```json
{ {
@@ -294,27 +295,19 @@ Server <- session.stopped
"seq": 2, "seq": 2,
"trackId": "control", "trackId": "control",
"config": { "config": {
"assistantId": "asst_abc123", "channel": "web_debug",
"configVersionId": "ver_xyz789",
"output": { "output": {
"mode": "audio" "mode": "audio"
}, },
"services": { "tools": {
"llm": { "enabled": true,
"provider": "openai", "count": 2
"model": "gpt-4"
},
"asr": {
"provider": "sensevoice",
"model": "paraformer"
},
"tts": {
"provider": "aliyun",
"model": "xiaoyun",
"enabled": true
}
}, },
"tools": ["weather", "calculator"] "tracks": {
"audio_in": "audio_in",
"audio_out": "audio_out",
"control": "control"
}
} }
} }
``` ```
@@ -322,13 +315,18 @@ Server <- session.stopped
| 字段 | 类型 | 说明 | | 字段 | 类型 | 说明 |
|---|---|---| |---|---|---|
| `trackId` | string | 固定为 `"control"` | | `trackId` | string | 固定为 `"control"` |
| `config` | object | 已解析的运行时配置 | | `config` | object | SaaS 安全的公开配置快照 |
| `config.assistantId` | string | 助手 ID | | `config.channel` | string | 回显 `session.start.metadata.channel`(如提供) |
| `config.configVersionId` | string | 配置版本 ID |
| `config.output` | object | 输出配置 | | `config.output` | object | 输出配置 |
| `config.output.mode` | string | 输出模式:`"audio"` / `"text"` | | `config.output.mode` | string | 输出模式:`"audio"` / `"text"` |
| `config.services` | object | 服务配置(不包含密钥) | | `config.tools.enabled` | boolean | 是否启用工具能力 |
| `config.tools` | array | 可用工具列表 | | `config.tools.count` | number | 可用工具数量(不暴露工具清单) |
| `config.tracks` | object | 可用轨道列表 |
**不会返回以下内部字段**
- `assistantId` / `appId` / `configVersionId`
- `services`provider/model/baseUrl 等)
- 系统提示词原文及其它内部编排细节
--- ---

View File

@@ -493,6 +493,10 @@ class Settings(BaseSettings):
inactivity_timeout_sec: int = Field(default=60, description="Close connection after no message from client (seconds)") inactivity_timeout_sec: int = Field(default=60, description="Close connection after no message from client (seconds)")
heartbeat_interval_sec: int = Field(default=50, description="Send heartBeat event to client every N seconds") heartbeat_interval_sec: int = Field(default=50, description="Send heartBeat event to client every N seconds")
ws_protocol_version: str = Field(default="v1", description="Public WS protocol version") ws_protocol_version: str = Field(default="v1", description="Public WS protocol version")
ws_emit_config_resolved: bool = Field(
default=False,
description="Emit config.resolved after session.started (debug/internal use; disabled for public SaaS by default)",
)
# Backend bridge configuration (for call/transcript persistence) # Backend bridge configuration (for call/transcript persistence)
backend_mode: str = Field( backend_mode: str = Field(

View File

@@ -1,7 +1,6 @@
"""Session management for active calls.""" """Session management for active calls."""
import asyncio import asyncio
import hashlib
import json import json
import re import re
import time import time
@@ -383,15 +382,18 @@ class Session:
audio=message.audio.model_dump() if message.audio else {}, audio=message.audio.model_dump() if message.audio else {},
) )
) )
await self._send_event( if settings.ws_emit_config_resolved:
ev( await self._send_event(
"config.resolved", ev(
trackId=self.TRACK_CONTROL, "config.resolved",
config=self._build_config_resolved(metadata), trackId=self.TRACK_CONTROL,
config=self._build_config_resolved(metadata),
)
) )
) else:
logger.debug("Session {} skipped config.resolved (ws_emit_config_resolved=false)", self.id)
# Emit opener only after frontend has received session.started/config events. # Emit opener only after frontend has received session.started (and optional config event).
await self.pipeline.emit_initial_greeting() await self.pipeline.emit_initial_greeting()
async def _handle_session_stop(self, reason: Optional[str]) -> None: async def _handle_session_stop(self, reason: Optional[str]) -> None:
@@ -1118,25 +1120,36 @@ class Session:
return sanitized, None return sanitized, None
def _build_config_resolved(self, metadata: Dict[str, Any]) -> Dict[str, Any]: def _build_config_resolved(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
"""Build public resolved config payload (secrets removed).""" """Build public resolved config payload (SaaS-safe, no internal runtime details)."""
system_prompt = str(metadata.get("systemPrompt") or self.pipeline.conversation.system_prompt or "")
prompt_hash = hashlib.sha256(system_prompt.encode("utf-8")).hexdigest() if system_prompt else None
runtime = self.pipeline.resolved_runtime_config() runtime = self.pipeline.resolved_runtime_config()
runtime_output = runtime.get("output", {}) if isinstance(runtime, dict) else {}
output_mode = str(runtime_output.get("mode") or "").strip().lower() if isinstance(runtime_output, dict) else ""
if output_mode not in {"audio", "text"}:
output_mode = "audio"
return { tools_allowlist: List[str] = []
"appId": metadata.get("assistantId"), runtime_tools = runtime.get("tools", {}) if isinstance(runtime, dict) else {}
"channel": metadata.get("channel"), if isinstance(runtime_tools, dict):
"configVersionId": metadata.get("configVersionId") or metadata.get("config_version_id"), allowlist = runtime_tools.get("allowlist", [])
"prompt": {"sha256": prompt_hash}, if isinstance(allowlist, list):
"output": runtime.get("output", {}), tools_allowlist = [str(item) for item in allowlist if item is not None and str(item).strip()]
"services": runtime.get("services", {}),
"tools": runtime.get("tools", {}), resolved: Dict[str, Any] = {
"output": {"mode": output_mode},
"tools": {
"enabled": bool(tools_allowlist),
"count": len(tools_allowlist),
},
"tracks": { "tracks": {
"audio_in": self.TRACK_AUDIO_IN, "audio_in": self.TRACK_AUDIO_IN,
"audio_out": self.TRACK_AUDIO_OUT, "audio_out": self.TRACK_AUDIO_OUT,
"control": self.TRACK_CONTROL, "control": self.TRACK_CONTROL,
}, },
} }
if metadata.get("channel") is not None:
resolved["channel"] = metadata.get("channel")
return resolved
def _extract_json_obj(self, text: str) -> Optional[Dict[str, Any]]: def _extract_json_obj(self, text: str) -> Optional[Dict[str, Any]]:
"""Best-effort extraction of a JSON object from freeform text.""" """Best-effort extraction of a JSON object from freeform text."""

View File

@@ -156,8 +156,9 @@ Common events:
- Fields: `sessionId`, `trackId`, `tracks`, `audio` - Fields: `sessionId`, `trackId`, `tracks`, `audio`
- `config.resolved` - `config.resolved`
- Fields: `sessionId`, `trackId`, `config` - Fields: `sessionId`, `trackId`, `config`
- Sent immediately after `session.started`. - Optional debug event. Disabled by default (`ws_emit_config_resolved=false`).
- Contains effective model/voice/output/tool allowlist/prompt hash, and never includes secrets. - `config` is SaaS-safe and public-only: `channel` (if provided), `output.mode`, `tools.enabled`, `tools.count`, `tracks`.
- Must not expose internal IDs or runtime internals (`assistantId/appId/configVersionId/services/provider/model/baseUrl/systemPrompt`).
- `session.stopped` - `session.stopped`
- Fields: `sessionId`, `reason` - Fields: `sessionId`, `reason`
- `heartbeat` - `heartbeat`
@@ -196,7 +197,7 @@ Common events:
Track IDs (MVP fixed values): Track IDs (MVP fixed values):
- `audio_in`: ASR/VAD input-side events (`input.*`, `transcript.*`) - `audio_in`: ASR/VAD input-side events (`input.*`, `transcript.*`)
- `audio_out`: assistant output-side events (`assistant.*`, `output.audio.*`, `response.interrupted`, `metrics.ttfb`) - `audio_out`: assistant output-side events (`assistant.*`, `output.audio.*`, `response.interrupted`, `metrics.ttfb`)
- `control`: session/control events (`session.*`, `error`, `config.resolved`) - `control`: session/control events (`session.*`, `error`, optional `config.resolved`)
Correlation IDs (`event.data`): Correlation IDs (`event.data`):
- `turn_id`: one user-assistant interaction turn. - `turn_id`: one user-assistant interaction turn.

View File

@@ -322,15 +322,17 @@
3. `config.resolved` 3. `config.resolved`
- 关键字段: - 关键字段:
- `config.appId`
- `config.channel` - `config.channel`
- `config.configVersionId` - `config.output.mode`
- `config.prompt.sha256` - `config.tools.enabled`
- `config.output` - `config.tools.count`
- `config.services`(去密钥后的有效服务配置)
- `config.tools.allowlist`
- `config.tracks` - `config.tracks`
- 含义:服务端最终生效配置快照,便于前端展示与排错 - 含义:服务端公开配置快照SaaS 安全),便于前端展示与排错
- 发送策略:可选调试事件,默认关闭(`ws_emit_config_resolved=false`
- 不应返回:
- `assistantId` / `appId` / `configVersionId`
- `services`provider/model/baseUrl 等内部运行细节)
- 系统提示词原文及其它内部编排细节
4. `heartbeat` 4. `heartbeat`
- 关键字段:无业务字段(仅 envelope - 关键字段:无业务字段(仅 envelope
@@ -512,7 +514,7 @@ Client -> hello
Server <- hello.ack Server <- hello.ack
Client -> session.start Client -> session.start
Server <- session.started Server <- session.started
Server <- config.resolved Server <- (optional) config.resolved
Client -> (binary pcm frames...) Client -> (binary pcm frames...)
Server <- input.speech_started / transcript.delta / transcript.final Server <- input.speech_started / transcript.delta / transcript.final
Server <- assistant.response.delta / assistant.response.final Server <- assistant.response.delta / assistant.response.final

View File

@@ -143,7 +143,9 @@ async def test_handle_session_start_requires_assistant_id_and_closes_transport()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_handle_session_start_applies_whitelisted_overrides_and_ignores_workflow(): async def test_handle_session_start_applies_whitelisted_overrides_and_ignores_workflow(monkeypatch):
monkeypatch.setattr("core.session.settings.ws_emit_config_resolved", False)
session = Session.__new__(Session) session = Session.__new__(Session)
session.id = "sess_start_ok" session.id = "sess_start_ok"
session.ws_state = WsSessionState.WAIT_START session.ws_state = WsSessionState.WAIT_START
@@ -175,7 +177,7 @@ async def test_handle_session_start_applies_whitelisted_overrides_and_ignores_wo
return { return {
"output": {"mode": "text"}, "output": {"mode": "text"},
"services": {"llm": {"provider": "openai", "model": "gpt-4o-mini"}}, "services": {"llm": {"provider": "openai", "model": "gpt-4o-mini"}},
"tools": {"allowlist": []}, "tools": {"allowlist": ["calculator"]},
} }
session.transport = _Transport() session.transport = _Transport()
@@ -232,7 +234,94 @@ async def test_handle_session_start_applies_whitelisted_overrides_and_ignores_wo
assert session.pipeline.applied["output"]["mode"] == "text" assert session.pipeline.applied["output"]["mode"] == "text"
assert session.pipeline.applied["tools"] == [{"name": "calculator"}] assert session.pipeline.applied["tools"] == [{"name": "calculator"}]
assert not any(str(item.get("type", "")).startswith("workflow.") for item in events) assert not any(str(item.get("type", "")).startswith("workflow.") for item in events)
assert not any(item.get("type") == "config.resolved" for item in events)
@pytest.mark.asyncio
async def test_handle_session_start_emits_config_resolved_when_enabled(monkeypatch):
monkeypatch.setattr("core.session.settings.ws_emit_config_resolved", True)
session = Session.__new__(Session)
session.id = "sess_start_emit_config"
session.ws_state = WsSessionState.WAIT_START
session.state = "created"
session._assistant_id = "assistant_demo"
session.current_track_id = Session.TRACK_CONTROL
session._pipeline_started = False
class _Transport:
async def close(self):
return None
class _Pipeline:
def __init__(self):
self.started = False
self.applied = {}
self.conversation = type("Conversation", (), {"system_prompt": ""})()
async def start(self):
self.started = True
async def emit_initial_greeting(self):
return None
def apply_runtime_overrides(self, metadata):
self.applied = dict(metadata)
def resolved_runtime_config(self):
return {
"output": {"mode": "text"},
"services": {"llm": {"provider": "openai", "model": "gpt-4o-mini"}},
"tools": {"allowlist": ["calculator"]},
}
session.transport = _Transport()
session.pipeline = _Pipeline()
events = []
async def _start_history_bridge(_metadata):
return None
async def _load_server_runtime_metadata(_assistant_id):
return (
{
"assistantId": "assistant_demo",
"configVersionId": "cfg_1",
"systemPrompt": "Base prompt",
"greeting": "Base greeting",
"output": {"mode": "audio"},
},
None,
)
async def _send_event(event):
events.append(event)
async def _send_error(sender, message, code, **kwargs):
raise AssertionError(f"Unexpected error: sender={sender} code={code} message={message} kwargs={kwargs}")
session._start_history_bridge = _start_history_bridge
session._load_server_runtime_metadata = _load_server_runtime_metadata
session._send_event = _send_event
session._send_error = _send_error
await session._handle_session_start(
SessionStartMessage(
type="session.start",
metadata={
"channel": "web_debug",
"overrides": {
"output": {"mode": "text"},
},
},
)
)
config_event = next(item for item in events if item.get("type") == "config.resolved") config_event = next(item for item in events if item.get("type") == "config.resolved")
assert config_event["config"]["appId"] == "assistant_demo" assert "appId" not in config_event["config"]
assert "configVersionId" not in config_event["config"]
assert "services" not in config_event["config"]
assert config_event["config"]["channel"] == "web_debug" assert config_event["config"]["channel"] == "web_debug"
assert config_event["config"]["output"]["mode"] == "text"
assert config_event["config"]["tools"]["enabled"] is True
assert config_event["config"]["tools"]["count"] == 1