Support tool config in yaml

This commit is contained in:
Xin Wang
2026-02-25 17:49:58 +08:00
parent 08319a4cc7
commit da4a77eac7
7 changed files with 190 additions and 2 deletions

View File

@@ -11,9 +11,16 @@ PORT=8000
# EXTERNAL_IP=1.2.3.4 # EXTERNAL_IP=1.2.3.4
# Backend bridge (optional) # Backend bridge (optional)
# BACKEND_MODE=auto|http|disabled
BACKEND_MODE=auto
BACKEND_URL=http://127.0.0.1:8100 BACKEND_URL=http://127.0.0.1:8100
BACKEND_TIMEOUT_SEC=10 BACKEND_TIMEOUT_SEC=10
HISTORY_ENABLED=true
HISTORY_DEFAULT_USER_ID=1 HISTORY_DEFAULT_USER_ID=1
HISTORY_QUEUE_MAX_SIZE=256
HISTORY_RETRY_MAX_ATTEMPTS=2
HISTORY_RETRY_BACKOFF_SEC=0.2
HISTORY_FINALIZE_DRAIN_TIMEOUT_SEC=1.5
# Audio # Audio
SAMPLE_RATE=16000 SAMPLE_RATE=16000

View File

@@ -37,6 +37,8 @@ Agent 配置路径优先级
- Agent 相关配置是严格模式YAML 缺少必须项会直接报错,不会回退到 `.env` 或代码默认值。 - Agent 相关配置是严格模式YAML 缺少必须项会直接报错,不会回退到 `.env` 或代码默认值。
- 如果要引用环境变量,请在 YAML 显式写 `${ENV_VAR}` - 如果要引用环境变量,请在 YAML 显式写 `${ENV_VAR}`
- `siliconflow` 独立 section 已移除;请在 `agent.llm / agent.tts / agent.asr` 内通过 `provider``api_key``api_url``model` 配置。 - `siliconflow` 独立 section 已移除;请在 `agent.llm / agent.tts / agent.asr` 内通过 `provider``api_key``api_url``model` 配置。
- 现在支持在 Agent YAML 中配置 `agent.tools`(列表),用于声明运行时可调用工具。
- 工具配置示例见 `config/agents/tools.yaml`
## Backend Integration ## Backend Integration

View File

@@ -95,6 +95,7 @@ _AGENT_SETTING_KEYS = {
"duplex_system_prompt", "duplex_system_prompt",
"barge_in_min_duration_ms", "barge_in_min_duration_ms",
"barge_in_silence_tolerance_ms", "barge_in_silence_tolerance_ms",
"tools",
} }
_BASE_REQUIRED_AGENT_SETTING_KEYS = { _BASE_REQUIRED_AGENT_SETTING_KEYS = {
"vad_type", "vad_type",
@@ -239,6 +240,11 @@ def _normalize_agent_overrides(raw: Dict[str, Any]) -> Dict[str, Any]:
"Section 'siliconflow' is no longer supported. " "Section 'siliconflow' is no longer supported. "
"Move provider-specific fields into agent.llm / agent.asr / agent.tts." "Move provider-specific fields into agent.llm / agent.asr / agent.tts."
) )
if key == "tools":
if not isinstance(value, list):
raise ValueError("Agent config key 'tools' must be a list")
normalized["tools"] = value
continue
section_map = _AGENT_SECTION_KEY_MAP.get(key) section_map = _AGENT_SECTION_KEY_MAP.get(key)
if section_map is None: if section_map is None:
normalized[key] = value normalized[key] = value
@@ -444,6 +450,10 @@ class Settings(BaseSettings):
description="How much silence (ms) is tolerated during potential barge-in before reset" description="How much silence (ms) is tolerated during potential barge-in before reset"
) )
# Optional tool declarations from agent YAML.
# Supports OpenAI function schema style entries and/or shorthand string names.
tools: List[Any] = Field(default_factory=list, description="Default tool definitions for runtime")
# Logging # Logging
log_level: str = Field(default="INFO", description="Logging level") log_level: str = Field(default="INFO", description="Logging level")
log_format: str = Field(default="json", description="Log format (json or text)") log_format: str = Field(default="json", description="Log format (json or text)")

73
config/agents/tools.yaml Normal file
View File

@@ -0,0 +1,73 @@
# Agent behavior configuration with tool declarations.
# This profile is an example only.
agent:
vad:
type: silero
model_path: data/vad/silero_vad.onnx
threshold: 0.5
min_speech_duration_ms: 100
eou_threshold_ms: 800
llm:
# provider: openai | openai_compatible | siliconflow
provider: openai_compatible
model: deepseek-v3
temperature: 0.7
api_key: your_llm_api_key
api_url: https://api.qnaigc.com/v1
tts:
# provider: edge | openai_compatible | siliconflow
provider: openai_compatible
api_key: your_tts_api_key
api_url: https://api.siliconflow.cn/v1/audio/speech
model: FunAudioLLM/CosyVoice2-0.5B
voice: anna
speed: 1.0
asr:
# provider: buffered | openai_compatible | siliconflow
provider: openai_compatible
api_key: your_asr_api_key
api_url: https://api.siliconflow.cn/v1/audio/transcriptions
model: FunAudioLLM/SenseVoiceSmall
interim_interval_ms: 500
min_audio_ms: 300
start_min_speech_ms: 160
pre_speech_ms: 240
final_tail_ms: 120
duplex:
enabled: true
system_prompt: You are a helpful voice assistant with tool-calling support.
barge_in:
min_duration_ms: 200
silence_tolerance_ms: 60
# Tool declarations consumed by the engine at startup.
# - String form enables built-in/default tool schema when available.
# - Object form provides OpenAI function schema + executor hint.
tools:
- current_time
- calculator
- name: weather
description: Get weather by city name.
parameters:
type: object
properties:
city:
type: string
description: City name, for example "San Francisco".
required: [city]
executor: server
- name: open_map
description: Open map app on the client device.
parameters:
type: object
properties:
query:
type: string
required: [query]
executor: client

View File

@@ -206,7 +206,8 @@ class DuplexPipeline:
self._runtime_barge_in_min_duration_ms: Optional[int] = None self._runtime_barge_in_min_duration_ms: Optional[int] = None
self._runtime_knowledge: Dict[str, Any] = {} self._runtime_knowledge: Dict[str, Any] = {}
self._runtime_knowledge_base_id: Optional[str] = None self._runtime_knowledge_base_id: Optional[str] = None
self._runtime_tools: List[Any] = [] raw_default_tools = settings.tools if isinstance(settings.tools, list) else []
self._runtime_tools: List[Any] = list(raw_default_tools)
self._runtime_tool_executor: Dict[str, str] = {} self._runtime_tool_executor: Dict[str, str] = {}
self._pending_tool_waiters: Dict[str, asyncio.Future] = {} self._pending_tool_waiters: Dict[str, asyncio.Future] = {}
self._early_tool_results: Dict[str, Dict[str, Any]] = {} self._early_tool_results: Dict[str, Dict[str, Any]] = {}
@@ -227,6 +228,8 @@ class DuplexPipeline:
self._pending_llm_delta: str = "" self._pending_llm_delta: str = ""
self._last_llm_delta_emit_ms: float = 0.0 self._last_llm_delta_emit_ms: float = 0.0
self._runtime_tool_executor = self._resolved_tool_executor_map()
if self._server_tool_executor is None: if self._server_tool_executor is None:
if self._tool_resource_resolver: if self._tool_resource_resolver:
async def _executor(call: Dict[str, Any]) -> Dict[str, Any]: async def _executor(call: Dict[str, Any]) -> Dict[str, Any]:
@@ -369,7 +372,7 @@ class DuplexPipeline:
}, },
}, },
"tools": { "tools": {
"allowlist": sorted(self._runtime_tool_executor.keys()), "allowlist": self._resolved_tool_allowlist(),
}, },
"tracks": { "tracks": {
"audio_in": self.track_audio_in, "audio_in": self.track_audio_in,
@@ -1165,6 +1168,23 @@ class DuplexPipeline:
result[name] = executor result[name] = executor
return result return result
def _resolved_tool_allowlist(self) -> List[str]:
names: set[str] = set()
for item in self._runtime_tools:
if isinstance(item, str):
name = item.strip()
if name:
names.add(name)
continue
if not isinstance(item, dict):
continue
fn = item.get("function")
if isinstance(fn, dict) and fn.get("name"):
names.add(str(fn.get("name")).strip())
elif item.get("name"):
names.add(str(item.get("name")).strip())
return sorted([name for name in names if name])
def _tool_name(self, tool_call: Dict[str, Any]) -> str: def _tool_name(self, tool_call: Dict[str, Any]) -> str:
fn = tool_call.get("function") fn = tool_call.get("function")
if isinstance(fn, dict): if isinstance(fn, dict):

View File

@@ -202,3 +202,51 @@ def test_agent_yaml_missing_env_reference_fails(monkeypatch, tmp_path):
with pytest.raises(ValueError, match="Missing environment variable"): with pytest.raises(ValueError, match="Missing environment variable"):
load_settings(argv=["--agent-config", str(file_path)]) load_settings(argv=["--agent-config", str(file_path)])
def test_agent_yaml_tools_list_is_loaded(monkeypatch, tmp_path):
monkeypatch.chdir(tmp_path)
file_path = tmp_path / "tools-agent.yaml"
_write_yaml(
file_path,
_full_agent_yaml()
+ """
tools:
- current_time
- name: weather
description: Get weather by city.
parameters:
type: object
properties:
city:
type: string
required: [city]
executor: server
""",
)
settings = load_settings(argv=["--agent-config", str(file_path)])
assert isinstance(settings.tools, list)
assert settings.tools[0] == "current_time"
assert settings.tools[1]["name"] == "weather"
assert settings.tools[1]["executor"] == "server"
def test_agent_yaml_tools_must_be_list(monkeypatch, tmp_path):
monkeypatch.chdir(tmp_path)
file_path = tmp_path / "bad-tools-agent.yaml"
_write_yaml(
file_path,
_full_agent_yaml()
+ """
tools:
weather:
executor: server
""",
)
with pytest.raises(ValueError, match="Agent config key 'tools' must be a list"):
load_settings(argv=["--agent-config", str(file_path)])

View File

@@ -92,6 +92,34 @@ def _build_pipeline(monkeypatch, llm_rounds: List[List[LLMStreamEvent]]) -> tupl
return pipeline, events return pipeline, events
def test_pipeline_uses_default_tools_from_settings(monkeypatch):
monkeypatch.setattr(
"core.duplex_pipeline.settings.tools",
[
"current_time",
{
"name": "weather",
"description": "Get weather by city",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
"executor": "server",
},
],
)
pipeline, _events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
cfg = pipeline.resolved_runtime_config()
assert cfg["tools"]["allowlist"] == ["current_time", "weather"]
schemas = pipeline._resolved_tool_schemas()
names = [s.get("function", {}).get("name") for s in schemas if isinstance(s, dict)]
assert "current_time" in names
assert "weather" in names
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_ws_message_parses_tool_call_results(): async def test_ws_message_parses_tool_call_results():
msg = parse_client_message( msg = parse_client_message(