Support tool config in yaml

2026-02-25 17:49:58 +08:00
parent 08319a4cc7
commit da4a77eac7
7 changed files with 190 additions and 2 deletions
--- a/.env.example
+++ b/.env.example
@@ -11,9 +11,16 @@ PORT=8000
 # EXTERNAL_IP=1.2.3.4
 # Backend bridge (optional)
 # BACKEND_MODE=auto|http|disabled
 BACKEND_MODE=auto
 BACKEND_URL=http://127.0.0.1:8100
 BACKEND_TIMEOUT_SEC=10
 HISTORY_ENABLED=true
 HISTORY_DEFAULT_USER_ID=1
 HISTORY_QUEUE_MAX_SIZE=256
 HISTORY_RETRY_MAX_ATTEMPTS=2
 HISTORY_RETRY_BACKOFF_SEC=0.2
 HISTORY_FINALIZE_DRAIN_TIMEOUT_SEC=1.5
 # Audio
 SAMPLE_RATE=16000
--- a/README.md
+++ b/README.md
@@ -37,6 +37,8 @@ Agent 配置路径优先级
 - Agent 相关配置是严格模式：YAML 缺少必须项会直接报错，不会回退到 `.env` 或代码默认值。
 - 如果要引用环境变量，请在 YAML 显式写 `${ENV_VAR}`。
 - `siliconflow` 独立 section 已移除；请在 `agent.llm / agent.tts / agent.asr` 内通过 `provider`、`api_key`、`api_url`、`model` 配置。
 - 现在支持在 Agent YAML 中配置 `agent.tools`（列表），用于声明运行时可调用工具。
 - 工具配置示例见 `config/agents/tools.yaml`。
 ## Backend Integration
--- a/app/config.py
+++ b/app/config.py
@@ -95,6 +95,7 @@ _AGENT_SETTING_KEYS = {
    "duplex_system_prompt",
    "barge_in_min_duration_ms",
    "barge_in_silence_tolerance_ms",
    "tools",
 }
 _BASE_REQUIRED_AGENT_SETTING_KEYS = {
    "vad_type",
@@ -239,6 +240,11 @@ def _normalize_agent_overrides(raw: Dict[str, Any]) -> Dict[str, Any]:
                "Section 'siliconflow' is no longer supported. "
                "Move provider-specific fields into agent.llm / agent.asr / agent.tts."
            )
        if key == "tools":
            if not isinstance(value, list):
                raise ValueError("Agent config key 'tools' must be a list")
            normalized["tools"] = value
            continue
        section_map = _AGENT_SECTION_KEY_MAP.get(key)
        if section_map is None:
            normalized[key] = value
@@ -444,6 +450,10 @@ class Settings(BaseSettings):
        description="How much silence (ms) is tolerated during potential barge-in before reset"
    )
    # Optional tool declarations from agent YAML.
    # Supports OpenAI function schema style entries and/or shorthand string names.
    tools: List[Any] = Field(default_factory=list, description="Default tool definitions for runtime")
    # Logging
    log_level: str = Field(default="INFO", description="Logging level")
    log_format: str = Field(default="json", description="Log format (json or text)")
--- a/config/agents/tools.yaml
+++ b/config/agents/tools.yaml
@@ -0,0 +1,73 @@
 # Agent behavior configuration with tool declarations.
 # This profile is an example only.
 agent:
  vad:
    type: silero
    model_path: data/vad/silero_vad.onnx
    threshold: 0.5
    min_speech_duration_ms: 100
    eou_threshold_ms: 800
  llm:
    # provider: openai | openai_compatible | siliconflow
    provider: openai_compatible
    model: deepseek-v3
    temperature: 0.7
    api_key: your_llm_api_key
    api_url: https://api.qnaigc.com/v1
  tts:
    # provider: edge | openai_compatible | siliconflow
    provider: openai_compatible
    api_key: your_tts_api_key
    api_url: https://api.siliconflow.cn/v1/audio/speech
    model: FunAudioLLM/CosyVoice2-0.5B
    voice: anna
    speed: 1.0
  asr:
    # provider: buffered | openai_compatible | siliconflow
    provider: openai_compatible
    api_key: your_asr_api_key
    api_url: https://api.siliconflow.cn/v1/audio/transcriptions
    model: FunAudioLLM/SenseVoiceSmall
    interim_interval_ms: 500
    min_audio_ms: 300
    start_min_speech_ms: 160
    pre_speech_ms: 240
    final_tail_ms: 120
  duplex:
    enabled: true
    system_prompt: You are a helpful voice assistant with tool-calling support.
  barge_in:
    min_duration_ms: 200
    silence_tolerance_ms: 60
  # Tool declarations consumed by the engine at startup.
  # - String form enables built-in/default tool schema when available.
  # - Object form provides OpenAI function schema + executor hint.
  tools:
    - current_time
    - calculator
    - name: weather
      description: Get weather by city name.
      parameters:
        type: object
        properties:
          city:
            type: string
            description: City name, for example "San Francisco".
        required: [city]
      executor: server
    - name: open_map
      description: Open map app on the client device.
      parameters:
        type: object
        properties:
          query:
            type: string
        required: [query]
      executor: client
--- a/core/duplex_pipeline.py
+++ b/core/duplex_pipeline.py
@@ -206,7 +206,8 @@ class DuplexPipeline:
        self._runtime_barge_in_min_duration_ms: Optional[int] = None
        self._runtime_knowledge: Dict[str, Any] = {}
        self._runtime_knowledge_base_id: Optional[str] = None
-        self._runtime_tools: List[Any] = []
+        raw_default_tools = settings.tools if isinstance(settings.tools, list) else []
        self._runtime_tools: List[Any] = list(raw_default_tools)
        self._runtime_tool_executor: Dict[str, str] = {}
        self._pending_tool_waiters: Dict[str, asyncio.Future] = {}
        self._early_tool_results: Dict[str, Dict[str, Any]] = {}
@@ -227,6 +228,8 @@ class DuplexPipeline:
        self._pending_llm_delta: str = ""
        self._last_llm_delta_emit_ms: float = 0.0
        self._runtime_tool_executor = self._resolved_tool_executor_map()
        if self._server_tool_executor is None:
            if self._tool_resource_resolver:
                async def _executor(call: Dict[str, Any]) -> Dict[str, Any]:
@@ -369,7 +372,7 @@ class DuplexPipeline:
                },
            },
            "tools": {
-                "allowlist": sorted(self._runtime_tool_executor.keys()),
+                "allowlist": self._resolved_tool_allowlist(),
            },
            "tracks": {
                "audio_in": self.track_audio_in,
@@ -1165,6 +1168,23 @@ class DuplexPipeline:
                result[name] = executor
        return result
    def _resolved_tool_allowlist(self) -> List[str]:
        names: set[str] = set()
        for item in self._runtime_tools:
            if isinstance(item, str):
                name = item.strip()
                if name:
                    names.add(name)
                continue
            if not isinstance(item, dict):
                continue
            fn = item.get("function")
            if isinstance(fn, dict) and fn.get("name"):
                names.add(str(fn.get("name")).strip())
            elif item.get("name"):
                names.add(str(item.get("name")).strip())
        return sorted([name for name in names if name])
    def _tool_name(self, tool_call: Dict[str, Any]) -> str:
        fn = tool_call.get("function")
        if isinstance(fn, dict):
--- a/tests/test_agent_config.py
+++ b/tests/test_agent_config.py
@@ -202,3 +202,51 @@ def test_agent_yaml_missing_env_reference_fails(monkeypatch, tmp_path):
    with pytest.raises(ValueError, match="Missing environment variable"):
        load_settings(argv=["--agent-config", str(file_path)])
 def test_agent_yaml_tools_list_is_loaded(monkeypatch, tmp_path):
    monkeypatch.chdir(tmp_path)
    file_path = tmp_path / "tools-agent.yaml"
    _write_yaml(
        file_path,
        _full_agent_yaml()
        + """
  tools:
    - current_time
    - name: weather
      description: Get weather by city.
      parameters:
        type: object
        properties:
          city:
            type: string
        required: [city]
      executor: server
 """,
    )
    settings = load_settings(argv=["--agent-config", str(file_path)])
    assert isinstance(settings.tools, list)
    assert settings.tools[0] == "current_time"
    assert settings.tools[1]["name"] == "weather"
    assert settings.tools[1]["executor"] == "server"
 def test_agent_yaml_tools_must_be_list(monkeypatch, tmp_path):
    monkeypatch.chdir(tmp_path)
    file_path = tmp_path / "bad-tools-agent.yaml"
    _write_yaml(
        file_path,
        _full_agent_yaml()
        + """
  tools:
    weather:
      executor: server
 """,
    )
    with pytest.raises(ValueError, match="Agent config key 'tools' must be a list"):
        load_settings(argv=["--agent-config", str(file_path)])
--- a/tests/test_tool_call_flow.py
+++ b/tests/test_tool_call_flow.py
@@ -92,6 +92,34 @@ def _build_pipeline(monkeypatch, llm_rounds: List[List[LLMStreamEvent]]) -> tupl
    return pipeline, events
 def test_pipeline_uses_default_tools_from_settings(monkeypatch):
    monkeypatch.setattr(
        "core.duplex_pipeline.settings.tools",
        [
            "current_time",
            {
                "name": "weather",
                "description": "Get weather by city",
                "parameters": {
                    "type": "object",
                    "properties": {"city": {"type": "string"}},
                    "required": ["city"],
                },
                "executor": "server",
            },
        ],
    )
    pipeline, _events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
    cfg = pipeline.resolved_runtime_config()
    assert cfg["tools"]["allowlist"] == ["current_time", "weather"]
    schemas = pipeline._resolved_tool_schemas()
    names = [s.get("function", {}).get("name") for s in schemas if isinstance(s, dict)]
    assert "current_time" in names
    assert "weather" in names
@pytest.mark.asyncio
 async def test_ws_message_parses_tool_call_results():
    msg = parse_client_message(