Support tool config in yaml

2026-02-25 17:49:58 +08:00
parent 08319a4cc7
commit da4a77eac7
7 changed files with 190 additions and 2 deletions
--- a/.env.example
+++ b/.env.example
@@ -11,9 +11,16 @@ PORT=8000
 # EXTERNAL_IP=1.2.3.4

 # Backend bridge (optional)
+# BACKEND_MODE=auto|http|disabled
+BACKEND_MODE=auto
 BACKEND_URL=http://127.0.0.1:8100
 BACKEND_TIMEOUT_SEC=10
+HISTORY_ENABLED=true
 HISTORY_DEFAULT_USER_ID=1
+HISTORY_QUEUE_MAX_SIZE=256
+HISTORY_RETRY_MAX_ATTEMPTS=2
+HISTORY_RETRY_BACKOFF_SEC=0.2
+HISTORY_FINALIZE_DRAIN_TIMEOUT_SEC=1.5

 # Audio
 SAMPLE_RATE=16000
--- a/README.md
+++ b/README.md
@@ -37,6 +37,8 @@ Agent 配置路径优先级
 - Agent 相关配置是严格模式：YAML 缺少必须项会直接报错，不会回退到 `.env` 或代码默认值。
 - 如果要引用环境变量，请在 YAML 显式写 `${ENV_VAR}`。
 - `siliconflow` 独立 section 已移除；请在 `agent.llm / agent.tts / agent.asr` 内通过 `provider`、`api_key`、`api_url`、`model` 配置。
+- 现在支持在 Agent YAML 中配置 `agent.tools`（列表），用于声明运行时可调用工具。
+- 工具配置示例见 `config/agents/tools.yaml`。

 ## Backend Integration

--- a/app/config.py
+++ b/app/config.py
@@ -95,6 +95,7 @@ _AGENT_SETTING_KEYS = {
    "duplex_system_prompt",
    "barge_in_min_duration_ms",
    "barge_in_silence_tolerance_ms",
+    "tools",
 }
 _BASE_REQUIRED_AGENT_SETTING_KEYS = {
    "vad_type",
@@ -239,6 +240,11 @@ def _normalize_agent_overrides(raw: Dict[str, Any]) -> Dict[str, Any]:
                "Section 'siliconflow' is no longer supported. "
                "Move provider-specific fields into agent.llm / agent.asr / agent.tts."
            )
+        if key == "tools":
+            if not isinstance(value, list):
+                raise ValueError("Agent config key 'tools' must be a list")
+            normalized["tools"] = value
+            continue
        section_map = _AGENT_SECTION_KEY_MAP.get(key)
        if section_map is None:
            normalized[key] = value
@@ -444,6 +450,10 @@ class Settings(BaseSettings):
        description="How much silence (ms) is tolerated during potential barge-in before reset"
    )

+    # Optional tool declarations from agent YAML.
+    # Supports OpenAI function schema style entries and/or shorthand string names.
+    tools: List[Any] = Field(default_factory=list, description="Default tool definitions for runtime")
+
    # Logging
    log_level: str = Field(default="INFO", description="Logging level")
    log_format: str = Field(default="json", description="Log format (json or text)")
--- a/config/agents/tools.yaml
+++ b/config/agents/tools.yaml
@@ -0,0 +1,73 @@
+# Agent behavior configuration with tool declarations.
+# This profile is an example only.
+
+agent:
+  vad:
+    type: silero
+    model_path: data/vad/silero_vad.onnx
+    threshold: 0.5
+    min_speech_duration_ms: 100
+    eou_threshold_ms: 800
+
+  llm:
+    # provider: openai | openai_compatible | siliconflow
+    provider: openai_compatible
+    model: deepseek-v3
+    temperature: 0.7
+    api_key: your_llm_api_key
+    api_url: https://api.qnaigc.com/v1
+
+  tts:
+    # provider: edge | openai_compatible | siliconflow
+    provider: openai_compatible
+    api_key: your_tts_api_key
+    api_url: https://api.siliconflow.cn/v1/audio/speech
+    model: FunAudioLLM/CosyVoice2-0.5B
+    voice: anna
+    speed: 1.0
+
+  asr:
+    # provider: buffered | openai_compatible | siliconflow
+    provider: openai_compatible
+    api_key: your_asr_api_key
+    api_url: https://api.siliconflow.cn/v1/audio/transcriptions
+    model: FunAudioLLM/SenseVoiceSmall
+    interim_interval_ms: 500
+    min_audio_ms: 300
+    start_min_speech_ms: 160
+    pre_speech_ms: 240
+    final_tail_ms: 120
+
+  duplex:
+    enabled: true
+    system_prompt: You are a helpful voice assistant with tool-calling support.
+
+  barge_in:
+    min_duration_ms: 200
+    silence_tolerance_ms: 60
+
+  # Tool declarations consumed by the engine at startup.
+  # - String form enables built-in/default tool schema when available.
+  # - Object form provides OpenAI function schema + executor hint.
+  tools:
+    - current_time
+    - calculator
+    - name: weather
+      description: Get weather by city name.
+      parameters:
+        type: object
+        properties:
+          city:
+            type: string
+            description: City name, for example "San Francisco".
+        required: [city]
+      executor: server
+    - name: open_map
+      description: Open map app on the client device.
+      parameters:
+        type: object
+        properties:
+          query:
+            type: string
+        required: [query]
+      executor: client
--- a/core/duplex_pipeline.py
+++ b/core/duplex_pipeline.py
@@ -206,7 +206,8 @@ class DuplexPipeline:
        self._runtime_barge_in_min_duration_ms: Optional[int] = None
        self._runtime_knowledge: Dict[str, Any] = {}
        self._runtime_knowledge_base_id: Optional[str] = None
-        self._runtime_tools: List[Any] = []
+        raw_default_tools = settings.tools if isinstance(settings.tools, list) else []
+        self._runtime_tools: List[Any] = list(raw_default_tools)
        self._runtime_tool_executor: Dict[str, str] = {}
        self._pending_tool_waiters: Dict[str, asyncio.Future] = {}
        self._early_tool_results: Dict[str, Dict[str, Any]] = {}
@@ -227,6 +228,8 @@ class DuplexPipeline:
        self._pending_llm_delta: str = ""
        self._last_llm_delta_emit_ms: float = 0.0

+        self._runtime_tool_executor = self._resolved_tool_executor_map()
+
        if self._server_tool_executor is None:
            if self._tool_resource_resolver:
                async def _executor(call: Dict[str, Any]) -> Dict[str, Any]:
@@ -369,7 +372,7 @@ class DuplexPipeline:
                },
            },
            "tools": {
-                "allowlist": sorted(self._runtime_tool_executor.keys()),
+                "allowlist": self._resolved_tool_allowlist(),
            },
            "tracks": {
                "audio_in": self.track_audio_in,
@@ -1165,6 +1168,23 @@ class DuplexPipeline:
                result[name] = executor
        return result

+    def _resolved_tool_allowlist(self) -> List[str]:
+        names: set[str] = set()
+        for item in self._runtime_tools:
+            if isinstance(item, str):
+                name = item.strip()
+                if name:
+                    names.add(name)
+                continue
+            if not isinstance(item, dict):
+                continue
+            fn = item.get("function")
+            if isinstance(fn, dict) and fn.get("name"):
+                names.add(str(fn.get("name")).strip())
+            elif item.get("name"):
+                names.add(str(item.get("name")).strip())
+        return sorted([name for name in names if name])
+
    def _tool_name(self, tool_call: Dict[str, Any]) -> str:
        fn = tool_call.get("function")
        if isinstance(fn, dict):
--- a/tests/test_agent_config.py
+++ b/tests/test_agent_config.py
@@ -202,3 +202,51 @@ def test_agent_yaml_missing_env_reference_fails(monkeypatch, tmp_path):

    with pytest.raises(ValueError, match="Missing environment variable"):
        load_settings(argv=["--agent-config", str(file_path)])
+
+
+def test_agent_yaml_tools_list_is_loaded(monkeypatch, tmp_path):
+    monkeypatch.chdir(tmp_path)
+    file_path = tmp_path / "tools-agent.yaml"
+    _write_yaml(
+        file_path,
+        _full_agent_yaml()
+        + """
+
+  tools:
+    - current_time
+    - name: weather
+      description: Get weather by city.
+      parameters:
+        type: object
+        properties:
+          city:
+            type: string
+        required: [city]
+      executor: server
+""",
+    )
+
+    settings = load_settings(argv=["--agent-config", str(file_path)])
+
+    assert isinstance(settings.tools, list)
+    assert settings.tools[0] == "current_time"
+    assert settings.tools[1]["name"] == "weather"
+    assert settings.tools[1]["executor"] == "server"
+
+
+def test_agent_yaml_tools_must_be_list(monkeypatch, tmp_path):
+    monkeypatch.chdir(tmp_path)
+    file_path = tmp_path / "bad-tools-agent.yaml"
+    _write_yaml(
+        file_path,
+        _full_agent_yaml()
+        + """
+
+  tools:
+    weather:
+      executor: server
+""",
+    )
+
+    with pytest.raises(ValueError, match="Agent config key 'tools' must be a list"):
+        load_settings(argv=["--agent-config", str(file_path)])
--- a/tests/test_tool_call_flow.py
+++ b/tests/test_tool_call_flow.py
@@ -92,6 +92,34 @@ def _build_pipeline(monkeypatch, llm_rounds: List[List[LLMStreamEvent]]) -> tupl
    return pipeline, events


+def test_pipeline_uses_default_tools_from_settings(monkeypatch):
+    monkeypatch.setattr(
+        "core.duplex_pipeline.settings.tools",
+        [
+            "current_time",
+            {
+                "name": "weather",
+                "description": "Get weather by city",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                    "required": ["city"],
+                },
+                "executor": "server",
+            },
+        ],
+    )
+    pipeline, _events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
+
+    cfg = pipeline.resolved_runtime_config()
+    assert cfg["tools"]["allowlist"] == ["current_time", "weather"]
+
+    schemas = pipeline._resolved_tool_schemas()
+    names = [s.get("function", {}).get("name") for s in schemas if isinstance(s, dict)]
+    assert "current_time" in names
+    assert "weather" in names
+
+
@pytest.mark.asyncio
 async def test_ws_message_parses_tool_call_results():
    msg = parse_client_message(