Add tts/text output schema

2026-02-11 09:50:46 +08:00
parent 2d7fc2b700
commit 15523d9ec2
13 changed files with 219 additions and 50 deletions
--- a/engine/docs/ws_v1_schema.md
+++ b/engine/docs/ws_v1_schema.md
@@ -53,6 +53,9 @@ Rules:
  },
  "metadata": {
    "client": "web-debug",
+    "output": {
+      "mode": "audio"
+    },
    "systemPrompt": "You are concise.",
    "greeting": "Hi, how can I help?",
    "services": {
@@ -70,6 +73,7 @@ Rules:
        "minAudioMs": 300
      },
      "tts": {
+        "enabled": true,
        "provider": "siliconflow",
        "model": "FunAudioLLM/CosyVoice2-0.5B",
        "apiKey": "sf-...",
@@ -83,6 +87,10 @@ Rules:

 `metadata.services` is optional. If omitted, server defaults to environment configuration.

+Text-only mode:
+- Set `metadata.output.mode = "text"` OR `metadata.services.tts.enabled = false`.
+- In this mode server still sends `assistant.response.delta/final`, but will not emit audio frames or `output.audio.start/end`.
+
 ### `input.text`

 ```json