diff --git a/docs/content/api-reference/websocket.md b/docs/content/api-reference/websocket.md index 28d9fb8..995fd1d 100644 --- a/docs/content/api-reference/websocket.md +++ b/docs/content/api-reference/websocket.md @@ -23,7 +23,7 @@ ws:///ws?assistant_id= ``` Client -> session.start Server <- session.started -Server <- config.resolved +Server <- (optional) config.resolved Client -> (binary pcm frames...) Server <- input.speech_started / transcript.delta / transcript.final Server <- assistant.response.delta / assistant.response.final @@ -237,7 +237,7 @@ Server <- session.stopped |---------|------|---------| | `audio_in` | ASR/VAD 输入侧事件 | `input.*`, `transcript.*` | | `audio_out` | 助手输出侧事件 | `assistant.*`, `output.audio.*`, `response.interrupted`, `metrics.ttfb` | -| `control` | 会话控制事件 | `session.*`, `error`, `config.resolved`, `heartbeat` | +| `control` | 会话控制事件 | `session.*`, `error`, `heartbeat`, `(optional) config.resolved` | --- @@ -284,7 +284,8 @@ Server <- session.stopped #### `config.resolved` -服务端最终配置快照,在 `session.started` 后立即发送。 +服务端返回的**公开配置快照**。 +默认不发送(SaaS 公网模式建议关闭);仅在 `WS_EMIT_CONFIG_RESOLVED=true` 时发送。 ```json { @@ -294,27 +295,19 @@ Server <- session.stopped "seq": 2, "trackId": "control", "config": { - "assistantId": "asst_abc123", - "configVersionId": "ver_xyz789", + "channel": "web_debug", "output": { "mode": "audio" }, - "services": { - "llm": { - "provider": "openai", - "model": "gpt-4" - }, - "asr": { - "provider": "sensevoice", - "model": "paraformer" - }, - "tts": { - "provider": "aliyun", - "model": "xiaoyun", - "enabled": true - } + "tools": { + "enabled": true, + "count": 2 }, - "tools": ["weather", "calculator"] + "tracks": { + "audio_in": "audio_in", + "audio_out": "audio_out", + "control": "control" + } } } ``` @@ -322,13 +315,18 @@ Server <- session.stopped | 字段 | 类型 | 说明 | |---|---|---| | `trackId` | string | 固定为 `"control"` | -| `config` | object | 已解析的运行时配置 | -| `config.assistantId` | string | 助手 ID | -| `config.configVersionId` | string | 配置版本 ID | +| `config` | object | SaaS 安全的公开配置快照 | +| `config.channel` | string | 回显 `session.start.metadata.channel`(如提供) | | `config.output` | object | 输出配置 | | `config.output.mode` | string | 输出模式:`"audio"` / `"text"` | -| `config.services` | object | 服务配置(不包含密钥) | -| `config.tools` | array | 可用工具列表 | +| `config.tools.enabled` | boolean | 是否启用工具能力 | +| `config.tools.count` | number | 可用工具数量(不暴露工具清单) | +| `config.tracks` | object | 可用轨道列表 | + +**不会返回以下内部字段**: +- `assistantId` / `appId` / `configVersionId` +- `services`(provider/model/baseUrl 等) +- 系统提示词原文及其它内部编排细节 --- diff --git a/engine/app/config.py b/engine/app/config.py index c4b3d4f..e81b852 100644 --- a/engine/app/config.py +++ b/engine/app/config.py @@ -493,6 +493,10 @@ class Settings(BaseSettings): inactivity_timeout_sec: int = Field(default=60, description="Close connection after no message from client (seconds)") heartbeat_interval_sec: int = Field(default=50, description="Send heartBeat event to client every N seconds") ws_protocol_version: str = Field(default="v1", description="Public WS protocol version") + ws_emit_config_resolved: bool = Field( + default=False, + description="Emit config.resolved after session.started (debug/internal use; disabled for public SaaS by default)", + ) # Backend bridge configuration (for call/transcript persistence) backend_mode: str = Field( diff --git a/engine/core/session.py b/engine/core/session.py index ac365ce..f7042fc 100644 --- a/engine/core/session.py +++ b/engine/core/session.py @@ -1,7 +1,6 @@ """Session management for active calls.""" import asyncio -import hashlib import json import re import time @@ -383,15 +382,18 @@ class Session: audio=message.audio.model_dump() if message.audio else {}, ) ) - await self._send_event( - ev( - "config.resolved", - trackId=self.TRACK_CONTROL, - config=self._build_config_resolved(metadata), + if settings.ws_emit_config_resolved: + await self._send_event( + ev( + "config.resolved", + trackId=self.TRACK_CONTROL, + config=self._build_config_resolved(metadata), + ) ) - ) + else: + logger.debug("Session {} skipped config.resolved (ws_emit_config_resolved=false)", self.id) - # Emit opener only after frontend has received session.started/config events. + # Emit opener only after frontend has received session.started (and optional config event). await self.pipeline.emit_initial_greeting() async def _handle_session_stop(self, reason: Optional[str]) -> None: @@ -1118,25 +1120,36 @@ class Session: return sanitized, None def _build_config_resolved(self, metadata: Dict[str, Any]) -> Dict[str, Any]: - """Build public resolved config payload (secrets removed).""" - system_prompt = str(metadata.get("systemPrompt") or self.pipeline.conversation.system_prompt or "") - prompt_hash = hashlib.sha256(system_prompt.encode("utf-8")).hexdigest() if system_prompt else None + """Build public resolved config payload (SaaS-safe, no internal runtime details).""" runtime = self.pipeline.resolved_runtime_config() + runtime_output = runtime.get("output", {}) if isinstance(runtime, dict) else {} + output_mode = str(runtime_output.get("mode") or "").strip().lower() if isinstance(runtime_output, dict) else "" + if output_mode not in {"audio", "text"}: + output_mode = "audio" - return { - "appId": metadata.get("assistantId"), - "channel": metadata.get("channel"), - "configVersionId": metadata.get("configVersionId") or metadata.get("config_version_id"), - "prompt": {"sha256": prompt_hash}, - "output": runtime.get("output", {}), - "services": runtime.get("services", {}), - "tools": runtime.get("tools", {}), + tools_allowlist: List[str] = [] + runtime_tools = runtime.get("tools", {}) if isinstance(runtime, dict) else {} + if isinstance(runtime_tools, dict): + allowlist = runtime_tools.get("allowlist", []) + if isinstance(allowlist, list): + tools_allowlist = [str(item) for item in allowlist if item is not None and str(item).strip()] + + resolved: Dict[str, Any] = { + "output": {"mode": output_mode}, + "tools": { + "enabled": bool(tools_allowlist), + "count": len(tools_allowlist), + }, "tracks": { "audio_in": self.TRACK_AUDIO_IN, "audio_out": self.TRACK_AUDIO_OUT, "control": self.TRACK_CONTROL, }, } + if metadata.get("channel") is not None: + resolved["channel"] = metadata.get("channel") + + return resolved def _extract_json_obj(self, text: str) -> Optional[Dict[str, Any]]: """Best-effort extraction of a JSON object from freeform text.""" diff --git a/engine/docs/ws_v1_schema.md b/engine/docs/ws_v1_schema.md index eb9db62..22a9dcf 100644 --- a/engine/docs/ws_v1_schema.md +++ b/engine/docs/ws_v1_schema.md @@ -156,8 +156,9 @@ Common events: - Fields: `sessionId`, `trackId`, `tracks`, `audio` - `config.resolved` - Fields: `sessionId`, `trackId`, `config` - - Sent immediately after `session.started`. - - Contains effective model/voice/output/tool allowlist/prompt hash, and never includes secrets. + - Optional debug event. Disabled by default (`ws_emit_config_resolved=false`). + - `config` is SaaS-safe and public-only: `channel` (if provided), `output.mode`, `tools.enabled`, `tools.count`, `tracks`. + - Must not expose internal IDs or runtime internals (`assistantId/appId/configVersionId/services/provider/model/baseUrl/systemPrompt`). - `session.stopped` - Fields: `sessionId`, `reason` - `heartbeat` @@ -196,7 +197,7 @@ Common events: Track IDs (MVP fixed values): - `audio_in`: ASR/VAD input-side events (`input.*`, `transcript.*`) - `audio_out`: assistant output-side events (`assistant.*`, `output.audio.*`, `response.interrupted`, `metrics.ttfb`) -- `control`: session/control events (`session.*`, `error`, `config.resolved`) +- `control`: session/control events (`session.*`, `error`, optional `config.resolved`) Correlation IDs (`event.data`): - `turn_id`: one user-assistant interaction turn. diff --git a/engine/docs/ws_v1_schema_zh.md b/engine/docs/ws_v1_schema_zh.md index f5be1f9..3313b73 100644 --- a/engine/docs/ws_v1_schema_zh.md +++ b/engine/docs/ws_v1_schema_zh.md @@ -322,15 +322,17 @@ 3. `config.resolved` - 关键字段: - - `config.appId` - `config.channel` - - `config.configVersionId` - - `config.prompt.sha256` - - `config.output` - - `config.services`(去密钥后的有效服务配置) - - `config.tools.allowlist` + - `config.output.mode` + - `config.tools.enabled` + - `config.tools.count` - `config.tracks` -- 含义:服务端最终生效配置快照,便于前端展示与排错 +- 含义:服务端公开配置快照(SaaS 安全),便于前端展示与排错 +- 发送策略:可选调试事件,默认关闭(`ws_emit_config_resolved=false`) +- 不应返回: + - `assistantId` / `appId` / `configVersionId` + - `services`(provider/model/baseUrl 等内部运行细节) + - 系统提示词原文及其它内部编排细节 4. `heartbeat` - 关键字段:无业务字段(仅 envelope) @@ -512,7 +514,7 @@ Client -> hello Server <- hello.ack Client -> session.start Server <- session.started -Server <- config.resolved +Server <- (optional) config.resolved Client -> (binary pcm frames...) Server <- input.speech_started / transcript.delta / transcript.final Server <- assistant.response.delta / assistant.response.final diff --git a/engine/tests/test_ws_protocol_session_start.py b/engine/tests/test_ws_protocol_session_start.py index 07ee762..fb2c3f4 100644 --- a/engine/tests/test_ws_protocol_session_start.py +++ b/engine/tests/test_ws_protocol_session_start.py @@ -143,7 +143,9 @@ async def test_handle_session_start_requires_assistant_id_and_closes_transport() @pytest.mark.asyncio -async def test_handle_session_start_applies_whitelisted_overrides_and_ignores_workflow(): +async def test_handle_session_start_applies_whitelisted_overrides_and_ignores_workflow(monkeypatch): + monkeypatch.setattr("core.session.settings.ws_emit_config_resolved", False) + session = Session.__new__(Session) session.id = "sess_start_ok" session.ws_state = WsSessionState.WAIT_START @@ -175,7 +177,7 @@ async def test_handle_session_start_applies_whitelisted_overrides_and_ignores_wo return { "output": {"mode": "text"}, "services": {"llm": {"provider": "openai", "model": "gpt-4o-mini"}}, - "tools": {"allowlist": []}, + "tools": {"allowlist": ["calculator"]}, } session.transport = _Transport() @@ -232,7 +234,94 @@ async def test_handle_session_start_applies_whitelisted_overrides_and_ignores_wo assert session.pipeline.applied["output"]["mode"] == "text" assert session.pipeline.applied["tools"] == [{"name": "calculator"}] assert not any(str(item.get("type", "")).startswith("workflow.") for item in events) + assert not any(item.get("type") == "config.resolved" for item in events) + + +@pytest.mark.asyncio +async def test_handle_session_start_emits_config_resolved_when_enabled(monkeypatch): + monkeypatch.setattr("core.session.settings.ws_emit_config_resolved", True) + + session = Session.__new__(Session) + session.id = "sess_start_emit_config" + session.ws_state = WsSessionState.WAIT_START + session.state = "created" + session._assistant_id = "assistant_demo" + session.current_track_id = Session.TRACK_CONTROL + session._pipeline_started = False + + class _Transport: + async def close(self): + return None + + class _Pipeline: + def __init__(self): + self.started = False + self.applied = {} + self.conversation = type("Conversation", (), {"system_prompt": ""})() + + async def start(self): + self.started = True + + async def emit_initial_greeting(self): + return None + + def apply_runtime_overrides(self, metadata): + self.applied = dict(metadata) + + def resolved_runtime_config(self): + return { + "output": {"mode": "text"}, + "services": {"llm": {"provider": "openai", "model": "gpt-4o-mini"}}, + "tools": {"allowlist": ["calculator"]}, + } + + session.transport = _Transport() + session.pipeline = _Pipeline() + events = [] + + async def _start_history_bridge(_metadata): + return None + + async def _load_server_runtime_metadata(_assistant_id): + return ( + { + "assistantId": "assistant_demo", + "configVersionId": "cfg_1", + "systemPrompt": "Base prompt", + "greeting": "Base greeting", + "output": {"mode": "audio"}, + }, + None, + ) + + async def _send_event(event): + events.append(event) + + async def _send_error(sender, message, code, **kwargs): + raise AssertionError(f"Unexpected error: sender={sender} code={code} message={message} kwargs={kwargs}") + + session._start_history_bridge = _start_history_bridge + session._load_server_runtime_metadata = _load_server_runtime_metadata + session._send_event = _send_event + session._send_error = _send_error + + await session._handle_session_start( + SessionStartMessage( + type="session.start", + metadata={ + "channel": "web_debug", + "overrides": { + "output": {"mode": "text"}, + }, + }, + ) + ) config_event = next(item for item in events if item.get("type") == "config.resolved") - assert config_event["config"]["appId"] == "assistant_demo" + assert "appId" not in config_event["config"] + assert "configVersionId" not in config_event["config"] + assert "services" not in config_event["config"] assert config_event["config"]["channel"] == "web_debug" + assert config_event["config"]["output"]["mode"] == "text" + assert config_event["config"]["tools"]["enabled"] is True + assert config_event["config"]["tools"]["count"] == 1