diff --git a/.env.example b/.env.example index c812b48..0dd0cd0 100644 --- a/.env.example +++ b/.env.example @@ -11,9 +11,16 @@ PORT=8000 # EXTERNAL_IP=1.2.3.4 # Backend bridge (optional) +# BACKEND_MODE=auto|http|disabled +BACKEND_MODE=auto BACKEND_URL=http://127.0.0.1:8100 BACKEND_TIMEOUT_SEC=10 +HISTORY_ENABLED=true HISTORY_DEFAULT_USER_ID=1 +HISTORY_QUEUE_MAX_SIZE=256 +HISTORY_RETRY_MAX_ATTEMPTS=2 +HISTORY_RETRY_BACKOFF_SEC=0.2 +HISTORY_FINALIZE_DRAIN_TIMEOUT_SEC=1.5 # Audio SAMPLE_RATE=16000 diff --git a/README.md b/README.md index 62c972e..3353270 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,8 @@ Agent 配置路径优先级 - Agent 相关配置是严格模式:YAML 缺少必须项会直接报错,不会回退到 `.env` 或代码默认值。 - 如果要引用环境变量,请在 YAML 显式写 `${ENV_VAR}`。 - `siliconflow` 独立 section 已移除;请在 `agent.llm / agent.tts / agent.asr` 内通过 `provider`、`api_key`、`api_url`、`model` 配置。 +- 现在支持在 Agent YAML 中配置 `agent.tools`(列表),用于声明运行时可调用工具。 +- 工具配置示例见 `config/agents/tools.yaml`。 ## Backend Integration diff --git a/app/config.py b/app/config.py index 1f6a2ea..2d1a680 100644 --- a/app/config.py +++ b/app/config.py @@ -95,6 +95,7 @@ _AGENT_SETTING_KEYS = { "duplex_system_prompt", "barge_in_min_duration_ms", "barge_in_silence_tolerance_ms", + "tools", } _BASE_REQUIRED_AGENT_SETTING_KEYS = { "vad_type", @@ -239,6 +240,11 @@ def _normalize_agent_overrides(raw: Dict[str, Any]) -> Dict[str, Any]: "Section 'siliconflow' is no longer supported. " "Move provider-specific fields into agent.llm / agent.asr / agent.tts." ) + if key == "tools": + if not isinstance(value, list): + raise ValueError("Agent config key 'tools' must be a list") + normalized["tools"] = value + continue section_map = _AGENT_SECTION_KEY_MAP.get(key) if section_map is None: normalized[key] = value @@ -444,6 +450,10 @@ class Settings(BaseSettings): description="How much silence (ms) is tolerated during potential barge-in before reset" ) + # Optional tool declarations from agent YAML. + # Supports OpenAI function schema style entries and/or shorthand string names. + tools: List[Any] = Field(default_factory=list, description="Default tool definitions for runtime") + # Logging log_level: str = Field(default="INFO", description="Logging level") log_format: str = Field(default="json", description="Log format (json or text)") diff --git a/config/agents/tools.yaml b/config/agents/tools.yaml new file mode 100644 index 0000000..9734bff --- /dev/null +++ b/config/agents/tools.yaml @@ -0,0 +1,73 @@ +# Agent behavior configuration with tool declarations. +# This profile is an example only. + +agent: + vad: + type: silero + model_path: data/vad/silero_vad.onnx + threshold: 0.5 + min_speech_duration_ms: 100 + eou_threshold_ms: 800 + + llm: + # provider: openai | openai_compatible | siliconflow + provider: openai_compatible + model: deepseek-v3 + temperature: 0.7 + api_key: your_llm_api_key + api_url: https://api.qnaigc.com/v1 + + tts: + # provider: edge | openai_compatible | siliconflow + provider: openai_compatible + api_key: your_tts_api_key + api_url: https://api.siliconflow.cn/v1/audio/speech + model: FunAudioLLM/CosyVoice2-0.5B + voice: anna + speed: 1.0 + + asr: + # provider: buffered | openai_compatible | siliconflow + provider: openai_compatible + api_key: your_asr_api_key + api_url: https://api.siliconflow.cn/v1/audio/transcriptions + model: FunAudioLLM/SenseVoiceSmall + interim_interval_ms: 500 + min_audio_ms: 300 + start_min_speech_ms: 160 + pre_speech_ms: 240 + final_tail_ms: 120 + + duplex: + enabled: true + system_prompt: You are a helpful voice assistant with tool-calling support. + + barge_in: + min_duration_ms: 200 + silence_tolerance_ms: 60 + + # Tool declarations consumed by the engine at startup. + # - String form enables built-in/default tool schema when available. + # - Object form provides OpenAI function schema + executor hint. + tools: + - current_time + - calculator + - name: weather + description: Get weather by city name. + parameters: + type: object + properties: + city: + type: string + description: City name, for example "San Francisco". + required: [city] + executor: server + - name: open_map + description: Open map app on the client device. + parameters: + type: object + properties: + query: + type: string + required: [query] + executor: client diff --git a/core/duplex_pipeline.py b/core/duplex_pipeline.py index 551c63c..5722cea 100644 --- a/core/duplex_pipeline.py +++ b/core/duplex_pipeline.py @@ -206,7 +206,8 @@ class DuplexPipeline: self._runtime_barge_in_min_duration_ms: Optional[int] = None self._runtime_knowledge: Dict[str, Any] = {} self._runtime_knowledge_base_id: Optional[str] = None - self._runtime_tools: List[Any] = [] + raw_default_tools = settings.tools if isinstance(settings.tools, list) else [] + self._runtime_tools: List[Any] = list(raw_default_tools) self._runtime_tool_executor: Dict[str, str] = {} self._pending_tool_waiters: Dict[str, asyncio.Future] = {} self._early_tool_results: Dict[str, Dict[str, Any]] = {} @@ -227,6 +228,8 @@ class DuplexPipeline: self._pending_llm_delta: str = "" self._last_llm_delta_emit_ms: float = 0.0 + self._runtime_tool_executor = self._resolved_tool_executor_map() + if self._server_tool_executor is None: if self._tool_resource_resolver: async def _executor(call: Dict[str, Any]) -> Dict[str, Any]: @@ -369,7 +372,7 @@ class DuplexPipeline: }, }, "tools": { - "allowlist": sorted(self._runtime_tool_executor.keys()), + "allowlist": self._resolved_tool_allowlist(), }, "tracks": { "audio_in": self.track_audio_in, @@ -1165,6 +1168,23 @@ class DuplexPipeline: result[name] = executor return result + def _resolved_tool_allowlist(self) -> List[str]: + names: set[str] = set() + for item in self._runtime_tools: + if isinstance(item, str): + name = item.strip() + if name: + names.add(name) + continue + if not isinstance(item, dict): + continue + fn = item.get("function") + if isinstance(fn, dict) and fn.get("name"): + names.add(str(fn.get("name")).strip()) + elif item.get("name"): + names.add(str(item.get("name")).strip()) + return sorted([name for name in names if name]) + def _tool_name(self, tool_call: Dict[str, Any]) -> str: fn = tool_call.get("function") if isinstance(fn, dict): diff --git a/tests/test_agent_config.py b/tests/test_agent_config.py index 86fa0d4..c8698cb 100644 --- a/tests/test_agent_config.py +++ b/tests/test_agent_config.py @@ -202,3 +202,51 @@ def test_agent_yaml_missing_env_reference_fails(monkeypatch, tmp_path): with pytest.raises(ValueError, match="Missing environment variable"): load_settings(argv=["--agent-config", str(file_path)]) + + +def test_agent_yaml_tools_list_is_loaded(monkeypatch, tmp_path): + monkeypatch.chdir(tmp_path) + file_path = tmp_path / "tools-agent.yaml" + _write_yaml( + file_path, + _full_agent_yaml() + + """ + + tools: + - current_time + - name: weather + description: Get weather by city. + parameters: + type: object + properties: + city: + type: string + required: [city] + executor: server +""", + ) + + settings = load_settings(argv=["--agent-config", str(file_path)]) + + assert isinstance(settings.tools, list) + assert settings.tools[0] == "current_time" + assert settings.tools[1]["name"] == "weather" + assert settings.tools[1]["executor"] == "server" + + +def test_agent_yaml_tools_must_be_list(monkeypatch, tmp_path): + monkeypatch.chdir(tmp_path) + file_path = tmp_path / "bad-tools-agent.yaml" + _write_yaml( + file_path, + _full_agent_yaml() + + """ + + tools: + weather: + executor: server +""", + ) + + with pytest.raises(ValueError, match="Agent config key 'tools' must be a list"): + load_settings(argv=["--agent-config", str(file_path)]) diff --git a/tests/test_tool_call_flow.py b/tests/test_tool_call_flow.py index 20d264b..6337edd 100644 --- a/tests/test_tool_call_flow.py +++ b/tests/test_tool_call_flow.py @@ -92,6 +92,34 @@ def _build_pipeline(monkeypatch, llm_rounds: List[List[LLMStreamEvent]]) -> tupl return pipeline, events +def test_pipeline_uses_default_tools_from_settings(monkeypatch): + monkeypatch.setattr( + "core.duplex_pipeline.settings.tools", + [ + "current_time", + { + "name": "weather", + "description": "Get weather by city", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + "executor": "server", + }, + ], + ) + pipeline, _events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]]) + + cfg = pipeline.resolved_runtime_config() + assert cfg["tools"]["allowlist"] == ["current_time", "weather"] + + schemas = pipeline._resolved_tool_schemas() + names = [s.get("function", {}).get("name") for s in schemas if isinstance(s, dict)] + assert "current_time" in names + assert "weather" in names + + @pytest.mark.asyncio async def test_ws_message_parses_tool_call_results(): msg = parse_client_message(