From 935f2fbd1fcf35785fd43bcb52402bd4705c00a5 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Thu, 5 Mar 2026 21:24:15 +0800 Subject: [PATCH] Refactor assistant configuration management and update documentation - Removed legacy agent profile settings from the .env.example and README, streamlining the configuration process. - Introduced a new local YAML configuration adapter for assistant settings, allowing for easier management of assistant profiles. - Updated backend integration documentation to clarify the behavior of assistant config sourcing based on backend URL settings. - Adjusted various service implementations to directly utilize API keys from the new configuration structure. - Enhanced test coverage for the new local YAML adapter and its integration with backend services. --- engine/.env.example | 18 +- engine/README.md | 50 +-- engine/app/backend_adapters.py | 312 +++++++++++++- engine/app/config.py | 387 +----------------- engine/app/main.py | 10 +- engine/core/duplex_pipeline.py | 6 +- engine/docs/backend_integration.md | 7 + engine/examples/wav_client.py | 4 +- .../scripts/generate_test_audio/.env.example | 1 + engine/services/dashscope_tts.py | 2 +- engine/services/llm.py | 4 +- engine/services/openai_compatible_asr.py | 2 +- engine/services/openai_compatible_tts.py | 4 +- engine/services/realtime.py | 2 - engine/tests/test_agent_config.py | 300 +------------- engine/tests/test_backend_adapters.py | 214 +++++++++- examples/README.md | 1 + 17 files changed, 585 insertions(+), 739 deletions(-) create mode 100644 engine/scripts/generate_test_audio/.env.example create mode 100644 examples/README.md diff --git a/engine/.env.example b/engine/.env.example index 4007aa0..8a87354 100644 --- a/engine/.env.example +++ b/engine/.env.example @@ -30,21 +30,9 @@ CHUNK_SIZE_MS=20 DEFAULT_CODEC=pcm MAX_AUDIO_BUFFER_SECONDS=30 -# Agent profile selection (optional fallback when CLI args are not used) -# Prefer CLI: -# python -m app.main --agent-config config/agents/default.yaml -# python -m app.main --agent-profile default -# AGENT_CONFIG_PATH=config/agents/default.yaml -# AGENT_PROFILE=default -AGENT_CONFIG_DIR=config/agents - -# Optional: provider credentials referenced from YAML, e.g. ${LLM_API_KEY} -# LLM_API_KEY=your_llm_api_key_here -# LLM_API_URL=https://api.openai.com/v1 -# TTS_API_KEY=your_tts_api_key_here -# TTS_API_URL=https://api.example.com/v1/audio/speech -# ASR_API_KEY=your_asr_api_key_here -# ASR_API_URL=https://api.example.com/v1/audio/transcriptions +# Local assistant/agent YAML directory. In local mode the runtime resolves: +# ASSISTANT_LOCAL_CONFIG_DIR/.yaml +ASSISTANT_LOCAL_CONFIG_DIR=engine/config/agents # Logging LOG_LEVEL=INFO diff --git a/engine/README.md b/engine/README.md index 9d0949b..5018c39 100644 --- a/engine/README.md +++ b/engine/README.md @@ -1,6 +1,6 @@ -# py-active-call-cc +# Realtime Agent Studio Engine -Python Active-Call: real-time audio streaming with WebSocket and WebRTC. +This repo contains a Python 3.11+ codebase for building low-latency realtime human-agent interaction pipelines (capture, stream, and process audio) using WebSockets or WebRTC. This repo contains a Python 3.11+ codebase for building low-latency voice pipelines (capture, stream, and process audio) using WebRTC and WebSockets. @@ -14,35 +14,11 @@ It is currently in an early, experimental stage. uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 ``` -使用 agent profile(推荐) - -``` -python -m app.main --agent-profile default -``` - -使用指定 YAML - -``` -python -m app.main --agent-config config/agents/default.yaml -``` - -Agent 配置路径优先级 -1. `--agent-config` -2. `--agent-profile`(映射到 `config/agents/.yaml`) -3. `AGENT_CONFIG_PATH` -4. `AGENT_PROFILE` -5. `config/agents/default.yaml`(若存在) - 说明 -- Agent 相关配置是严格模式:YAML 缺少必须项会直接报错,不会回退到 `.env` 或代码默认值。 -- 如果要引用环境变量,请在 YAML 显式写 `${ENV_VAR}`。 -- `siliconflow` 独立 section 已移除;请在 `agent.llm / agent.tts / agent.asr` 内通过 `provider`、`api_key`、`api_url`、`model` 配置。 -- `agent.tts.provider` 现支持 `dashscope`(Realtime 协议,非 OpenAI-compatible);默认 URL 为 `wss://dashscope.aliyuncs.com/api-ws/v1/realtime`,默认模型为 `qwen3-tts-flash-realtime`。 -- `agent.tts.dashscope_mode`(兼容旧写法 `agent.tts.mode`)支持 `commit | server_commit`,且仅在 `provider=dashscope` 时生效: - - `commit`:Engine 先按句切分,再逐句提交给 DashScope。 - - `server_commit`:Engine 不再逐句切分,由 DashScope 对整段文本自行切分。 -- 现在支持在 Agent YAML 中配置 `agent.tools`(列表),用于声明运行时可调用工具。 -- 工具配置示例见 `config/agents/tools.yaml`。 +- 启动阶段不再通过参数加载 Agent YAML。 +- 会话阶段统一按 `assistant_id` 拉取运行时配置: + - 有 `BACKEND_URL`:从 backend API 获取。 + - 无 `BACKEND_URL`(或 `BACKEND_MODE=disabled`):从 `ASSISTANT_LOCAL_CONFIG_DIR/.yaml` 获取。 ## Backend Integration @@ -50,6 +26,7 @@ Engine runtime now supports adapter-based backend integration: - `BACKEND_MODE=auto|http|disabled` - `BACKEND_URL` + `BACKEND_TIMEOUT_SEC` +- `ASSISTANT_LOCAL_CONFIG_DIR` (default `engine/config/agents`) - `HISTORY_ENABLED=true|false` Behavior: @@ -58,6 +35,16 @@ Behavior: - `http`: force HTTP backend; falls back to engine-only mode when URL is missing. - `disabled`: force engine-only mode (no backend calls). +Assistant config source behavior: + +- If `BACKEND_URL` is configured and backend mode is enabled, assistant config is loaded from backend API. +- If `BACKEND_URL` is empty (or backend mode is disabled), assistant config is loaded from local YAML. + +Local assistant YAML example: + +- File path: `engine/config/agents/.yaml` +- Runtime still requires WebSocket query param `assistant_id`; it must match the local file name. + History write path is now asynchronous and buffered per session: - `HISTORY_QUEUE_MAX_SIZE` @@ -84,3 +71,6 @@ python mic_client.py `/ws` uses a strict `v1` JSON control protocol with binary PCM audio frames. See `docs/ws_v1_schema.md`. + +# Reference +* [active-call](https://github.com/restsend/active-call) diff --git a/engine/app/backend_adapters.py b/engine/app/backend_adapters.py index 6ff2716..087f744 100644 --- a/engine/app/backend_adapters.py +++ b/engine/app/backend_adapters.py @@ -2,6 +2,8 @@ from __future__ import annotations +import re +from pathlib import Path from typing import Any, Dict, List, Optional import aiohttp @@ -9,6 +11,18 @@ from loguru import logger from app.config import settings +try: + import yaml +except ImportError: # pragma: no cover - validated when local YAML source is enabled + yaml = None + + +_ASSISTANT_ID_PATTERN = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_.-]{0,127}$") + + +def _assistant_error(code: str, assistant_id: str) -> Dict[str, Any]: + return {"__error_code": code, "assistantId": str(assistant_id or "")} + class NullBackendAdapter: """No-op adapter for engine-only runtime without backend dependencies.""" @@ -128,6 +142,283 @@ class HistoryDisabledBackendAdapter: return await self._delegate.fetch_tool_resource(tool_id) +class LocalYamlAssistantConfigAdapter(NullBackendAdapter): + """Load assistant runtime config from local YAML files.""" + + def __init__(self, config_dir: str): + self._config_dir = self._resolve_base_dir(config_dir) + + @staticmethod + def _resolve_base_dir(config_dir: str) -> Path: + raw = Path(str(config_dir or "").strip() or "engine/config/agents") + if raw.is_absolute(): + return raw.resolve() + + cwd_candidate = (Path.cwd() / raw).resolve() + if cwd_candidate.exists(): + return cwd_candidate + + engine_dir = Path(__file__).resolve().parent.parent + engine_candidate = (engine_dir / raw).resolve() + if engine_candidate.exists(): + return engine_candidate + + parts = raw.parts + if parts and parts[0] == "engine" and len(parts) > 1: + trimmed_candidate = (engine_dir / Path(*parts[1:])).resolve() + if trimmed_candidate.exists(): + return trimmed_candidate + + return cwd_candidate + + def _resolve_config_file(self, assistant_id: str) -> Optional[Path]: + normalized = str(assistant_id or "").strip() + if not _ASSISTANT_ID_PATTERN.match(normalized): + return None + + yaml_path = self._config_dir / f"{normalized}.yaml" + yml_path = self._config_dir / f"{normalized}.yml" + if yaml_path.exists(): + return yaml_path + if yml_path.exists(): + return yml_path + return None + + @staticmethod + def _as_str(value: Any) -> Optional[str]: + if value is None: + return None + text = str(value).strip() + return text or None + + @classmethod + def _translate_agent_schema(cls, assistant_id: str, payload: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """Translate legacy `agent:` YAML schema into runtime assistant metadata.""" + agent = payload.get("agent") + if not isinstance(agent, dict): + return None + + runtime: Dict[str, Any] = { + "assistantId": str(assistant_id), + "services": {}, + } + + llm = agent.get("llm") + if isinstance(llm, dict): + llm_runtime: Dict[str, Any] = {} + if cls._as_str(llm.get("provider")): + llm_runtime["provider"] = cls._as_str(llm.get("provider")) + if cls._as_str(llm.get("model")): + llm_runtime["model"] = cls._as_str(llm.get("model")) + if cls._as_str(llm.get("api_key")): + llm_runtime["apiKey"] = cls._as_str(llm.get("api_key")) + if cls._as_str(llm.get("api_url")): + llm_runtime["baseUrl"] = cls._as_str(llm.get("api_url")) + if llm_runtime: + runtime["services"]["llm"] = llm_runtime + + tts = agent.get("tts") + if isinstance(tts, dict): + tts_runtime: Dict[str, Any] = {} + if cls._as_str(tts.get("provider")): + tts_runtime["provider"] = cls._as_str(tts.get("provider")) + if cls._as_str(tts.get("model")): + tts_runtime["model"] = cls._as_str(tts.get("model")) + if cls._as_str(tts.get("api_key")): + tts_runtime["apiKey"] = cls._as_str(tts.get("api_key")) + if cls._as_str(tts.get("api_url")): + tts_runtime["baseUrl"] = cls._as_str(tts.get("api_url")) + if cls._as_str(tts.get("voice")): + tts_runtime["voice"] = cls._as_str(tts.get("voice")) + if tts.get("speed") is not None: + tts_runtime["speed"] = tts.get("speed") + dashscope_mode = cls._as_str(tts.get("dashscope_mode")) or cls._as_str(tts.get("mode")) + if dashscope_mode: + tts_runtime["mode"] = dashscope_mode + if tts_runtime: + runtime["services"]["tts"] = tts_runtime + + asr = agent.get("asr") + if isinstance(asr, dict): + asr_runtime: Dict[str, Any] = {} + if cls._as_str(asr.get("provider")): + asr_runtime["provider"] = cls._as_str(asr.get("provider")) + if cls._as_str(asr.get("model")): + asr_runtime["model"] = cls._as_str(asr.get("model")) + if cls._as_str(asr.get("api_key")): + asr_runtime["apiKey"] = cls._as_str(asr.get("api_key")) + if cls._as_str(asr.get("api_url")): + asr_runtime["baseUrl"] = cls._as_str(asr.get("api_url")) + if asr.get("interim_interval_ms") is not None: + asr_runtime["interimIntervalMs"] = asr.get("interim_interval_ms") + if asr.get("min_audio_ms") is not None: + asr_runtime["minAudioMs"] = asr.get("min_audio_ms") + if asr_runtime: + runtime["services"]["asr"] = asr_runtime + + duplex = agent.get("duplex") + if isinstance(duplex, dict): + if cls._as_str(duplex.get("system_prompt")): + runtime["systemPrompt"] = cls._as_str(duplex.get("system_prompt")) + if duplex.get("greeting") is not None: + runtime["greeting"] = duplex.get("greeting") + + barge_in = agent.get("barge_in") + if isinstance(barge_in, dict): + runtime["bargeIn"] = {} + if barge_in.get("min_duration_ms") is not None: + runtime["bargeIn"]["minDurationMs"] = barge_in.get("min_duration_ms") + if barge_in.get("silence_tolerance_ms") is not None: + runtime["bargeIn"]["silenceToleranceMs"] = barge_in.get("silence_tolerance_ms") + if not runtime["bargeIn"]: + runtime.pop("bargeIn", None) + + if isinstance(agent.get("tools"), list): + runtime["tools"] = agent.get("tools") + + if not runtime.get("services"): + runtime.pop("services", None) + return runtime + + async def fetch_assistant_config(self, assistant_id: str) -> Optional[Dict[str, Any]]: + config_file = self._resolve_config_file(assistant_id) + if config_file is None: + return _assistant_error("assistant.not_found", assistant_id) + + if yaml is None: + logger.warning( + "Local assistant config requested but PyYAML is unavailable (assistant_id={})", + assistant_id, + ) + return _assistant_error("assistant.config_unavailable", assistant_id) + + try: + with config_file.open("r", encoding="utf-8") as handle: + payload = yaml.safe_load(handle) or {} + except Exception as exc: + logger.warning( + "Failed to read local assistant config {} (assistant_id={}): {}", + config_file, + assistant_id, + exc, + ) + return _assistant_error("assistant.config_unavailable", assistant_id) + + if not isinstance(payload, dict): + logger.warning( + "Local assistant config is not an object (assistant_id={}, file={})", + assistant_id, + config_file, + ) + return _assistant_error("assistant.config_unavailable", assistant_id) + + translated = self._translate_agent_schema(assistant_id, payload) + if translated is not None: + payload = translated + + # Accept either backend-like payload shape or a direct assistant metadata object. + if isinstance(payload.get("assistant"), dict) or isinstance(payload.get("sessionStartMetadata"), dict): + normalized_payload = dict(payload) + else: + normalized_payload = {"assistant": dict(payload)} + + assistant_obj = normalized_payload.get("assistant") + if isinstance(assistant_obj, dict): + resolved_assistant_id = assistant_obj.get("assistantId") or assistant_obj.get("id") or assistant_id + assistant_obj["assistantId"] = str(resolved_assistant_id) + else: + normalized_payload["assistant"] = {"assistantId": str(assistant_id)} + + normalized_payload.setdefault("assistantId", str(assistant_id)) + normalized_payload.setdefault("configVersionId", f"local:{config_file.name}") + return normalized_payload + + +class AssistantConfigSourceAdapter: + """Route assistant config reads by backend availability without changing other APIs.""" + + def __init__( + self, + *, + delegate: HttpBackendAdapter | NullBackendAdapter | HistoryDisabledBackendAdapter, + local_delegate: LocalYamlAssistantConfigAdapter, + use_backend_assistant_config: bool, + ): + self._delegate = delegate + self._local_delegate = local_delegate + self._use_backend_assistant_config = bool(use_backend_assistant_config) + + async def fetch_assistant_config(self, assistant_id: str) -> Optional[Dict[str, Any]]: + if self._use_backend_assistant_config: + return await self._delegate.fetch_assistant_config(assistant_id) + return await self._local_delegate.fetch_assistant_config(assistant_id) + + async def create_call_record( + self, + *, + user_id: int, + assistant_id: Optional[str], + source: str = "debug", + ) -> Optional[str]: + return await self._delegate.create_call_record( + user_id=user_id, + assistant_id=assistant_id, + source=source, + ) + + async def add_transcript( + self, + *, + call_id: str, + turn_index: int, + speaker: str, + content: str, + start_ms: int, + end_ms: int, + confidence: Optional[float] = None, + duration_ms: Optional[int] = None, + ) -> bool: + return await self._delegate.add_transcript( + call_id=call_id, + turn_index=turn_index, + speaker=speaker, + content=content, + start_ms=start_ms, + end_ms=end_ms, + confidence=confidence, + duration_ms=duration_ms, + ) + + async def finalize_call_record( + self, + *, + call_id: str, + status: str, + duration_seconds: int, + ) -> bool: + return await self._delegate.finalize_call_record( + call_id=call_id, + status=status, + duration_seconds=duration_seconds, + ) + + async def search_knowledge_context( + self, + *, + kb_id: str, + query: str, + n_results: int = 5, + ) -> List[Dict[str, Any]]: + return await self._delegate.search_knowledge_context( + kb_id=kb_id, + query=query, + n_results=n_results, + ) + + async def fetch_tool_resource(self, tool_id: str) -> Optional[Dict[str, Any]]: + return await self._delegate.fetch_tool_resource(tool_id) + + class HttpBackendAdapter: """HTTP implementation of backend integration ports.""" @@ -322,36 +613,49 @@ def build_backend_adapter( backend_mode: str = "auto", history_enabled: bool = True, timeout_sec: int = 10, -) -> HttpBackendAdapter | NullBackendAdapter | HistoryDisabledBackendAdapter: + assistant_local_config_dir: str = "engine/config/agents", +) -> AssistantConfigSourceAdapter: """Create backend adapter implementation based on runtime settings.""" mode = str(backend_mode or "auto").strip().lower() has_url = bool(str(backend_url or "").strip()) base_adapter: HttpBackendAdapter | NullBackendAdapter + using_http_backend = False if mode in {"disabled", "off", "none", "null", "engine_only", "engine-only"}: base_adapter = NullBackendAdapter() elif mode == "http": if has_url: base_adapter = HttpBackendAdapter(backend_url=str(backend_url), timeout_sec=timeout_sec) + using_http_backend = True else: logger.warning("BACKEND_MODE=http but BACKEND_URL is empty; falling back to NullBackendAdapter") base_adapter = NullBackendAdapter() else: if has_url: base_adapter = HttpBackendAdapter(backend_url=str(backend_url), timeout_sec=timeout_sec) + using_http_backend = True else: base_adapter = NullBackendAdapter() + runtime_adapter: HttpBackendAdapter | NullBackendAdapter | HistoryDisabledBackendAdapter if not history_enabled: - return HistoryDisabledBackendAdapter(base_adapter) - return base_adapter + runtime_adapter = HistoryDisabledBackendAdapter(base_adapter) + else: + runtime_adapter = base_adapter + + return AssistantConfigSourceAdapter( + delegate=runtime_adapter, + local_delegate=LocalYamlAssistantConfigAdapter(assistant_local_config_dir), + use_backend_assistant_config=using_http_backend, + ) -def build_backend_adapter_from_settings() -> HttpBackendAdapter | NullBackendAdapter | HistoryDisabledBackendAdapter: +def build_backend_adapter_from_settings() -> AssistantConfigSourceAdapter: """Create backend adapter using current app settings.""" return build_backend_adapter( backend_url=settings.backend_url, backend_mode=settings.backend_mode, history_enabled=settings.history_enabled, timeout_sec=settings.backend_timeout_sec, + assistant_local_config_dir=settings.assistant_local_config_dir, ) diff --git a/engine/app/config.py b/engine/app/config.py index e81b852..d1ac72f 100644 --- a/engine/app/config.py +++ b/engine/app/config.py @@ -1,371 +1,31 @@ -"""Configuration management using Pydantic settings and agent YAML profiles.""" +"""Configuration management using Pydantic settings.""" import json import os -import re -import sys -from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, List, Optional from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict try: - import yaml -except ImportError: # pragma: no cover - validated when agent YAML is used - yaml = None + from dotenv import load_dotenv +except ImportError: # pragma: no cover - optional dependency in some runtimes + load_dotenv = None + +def _prime_process_env_from_dotenv() -> None: + """Load .env into process env early.""" + if load_dotenv is None: + return + + cwd_env = Path.cwd() / ".env" + engine_env = Path(__file__).resolve().parent.parent / ".env" + load_dotenv(dotenv_path=cwd_env, override=False) + if engine_env != cwd_env: + load_dotenv(dotenv_path=engine_env, override=False) -_ENV_REF_PATTERN = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)(?::([^}]*))?\}") -_DEFAULT_AGENT_CONFIG_DIR = "config/agents" -_DEFAULT_AGENT_CONFIG_FILE = "default.yaml" -_AGENT_SECTION_KEY_MAP: Dict[str, Dict[str, str]] = { - "vad": { - "type": "vad_type", - "model_path": "vad_model_path", - "threshold": "vad_threshold", - "min_speech_duration_ms": "vad_min_speech_duration_ms", - "eou_threshold_ms": "vad_eou_threshold_ms", - }, - "llm": { - "provider": "llm_provider", - "model": "llm_model", - "temperature": "llm_temperature", - "api_key": "llm_api_key", - "api_url": "llm_api_url", - }, - "tts": { - "provider": "tts_provider", - "api_key": "tts_api_key", - "api_url": "tts_api_url", - "model": "tts_model", - "voice": "tts_voice", - "dashscope_mode": "tts_mode", - "mode": "tts_mode", - "speed": "tts_speed", - }, - "asr": { - "provider": "asr_provider", - "api_key": "asr_api_key", - "api_url": "asr_api_url", - "model": "asr_model", - "interim_interval_ms": "asr_interim_interval_ms", - "min_audio_ms": "asr_min_audio_ms", - "start_min_speech_ms": "asr_start_min_speech_ms", - "pre_speech_ms": "asr_pre_speech_ms", - "final_tail_ms": "asr_final_tail_ms", - }, - "duplex": { - "enabled": "duplex_enabled", - "greeting": "duplex_greeting", - "system_prompt": "duplex_system_prompt", - "opener_audio_file": "duplex_opener_audio_file", - }, - "barge_in": { - "min_duration_ms": "barge_in_min_duration_ms", - "silence_tolerance_ms": "barge_in_silence_tolerance_ms", - }, -} -_AGENT_SETTING_KEYS = { - "vad_type", - "vad_model_path", - "vad_threshold", - "vad_min_speech_duration_ms", - "vad_eou_threshold_ms", - "llm_provider", - "llm_api_key", - "llm_api_url", - "llm_model", - "llm_temperature", - "tts_provider", - "tts_api_key", - "tts_api_url", - "tts_model", - "tts_voice", - "tts_mode", - "tts_speed", - "asr_provider", - "asr_api_key", - "asr_api_url", - "asr_model", - "asr_interim_interval_ms", - "asr_min_audio_ms", - "asr_start_min_speech_ms", - "asr_pre_speech_ms", - "asr_final_tail_ms", - "duplex_enabled", - "duplex_greeting", - "duplex_system_prompt", - "duplex_opener_audio_file", - "barge_in_min_duration_ms", - "barge_in_silence_tolerance_ms", - "tools", -} -_BASE_REQUIRED_AGENT_SETTING_KEYS = { - "vad_type", - "vad_model_path", - "vad_threshold", - "vad_min_speech_duration_ms", - "vad_eou_threshold_ms", - "llm_provider", - "llm_model", - "llm_temperature", - "tts_provider", - "tts_voice", - "tts_speed", - "asr_provider", - "asr_interim_interval_ms", - "asr_min_audio_ms", - "asr_start_min_speech_ms", - "asr_pre_speech_ms", - "asr_final_tail_ms", - "duplex_enabled", - "duplex_system_prompt", - "barge_in_min_duration_ms", - "barge_in_silence_tolerance_ms", -} -_OPENAI_COMPATIBLE_LLM_PROVIDERS = {"openai_compatible", "openai-compatible", "siliconflow"} -_OPENAI_COMPATIBLE_TTS_PROVIDERS = {"openai_compatible", "openai-compatible", "siliconflow"} -_DASHSCOPE_TTS_PROVIDERS = {"dashscope"} -_OPENAI_COMPATIBLE_ASR_PROVIDERS = {"openai_compatible", "openai-compatible", "siliconflow"} - - -def _normalized_provider(overrides: Dict[str, Any], key: str, default: str) -> str: - return str(overrides.get(key) or default).strip().lower() - - -def _is_blank(value: Any) -> bool: - return value is None or (isinstance(value, str) and not value.strip()) - - -@dataclass(frozen=True) -class AgentConfigSelection: - """Resolved agent config location and how it was selected.""" - - path: Optional[Path] - source: str - - -def _parse_cli_agent_args(argv: List[str]) -> Tuple[Optional[str], Optional[str]]: - """Parse only agent-related CLI flags from argv.""" - config_path: Optional[str] = None - profile: Optional[str] = None - i = 0 - while i < len(argv): - arg = argv[i] - if arg.startswith("--agent-config="): - config_path = arg.split("=", 1)[1].strip() or None - elif arg == "--agent-config" and i + 1 < len(argv): - config_path = argv[i + 1].strip() or None - i += 1 - elif arg.startswith("--agent-profile="): - profile = arg.split("=", 1)[1].strip() or None - elif arg == "--agent-profile" and i + 1 < len(argv): - profile = argv[i + 1].strip() or None - i += 1 - i += 1 - return config_path, profile - - -def _agent_config_dir() -> Path: - base_dir = Path(os.getenv("AGENT_CONFIG_DIR", _DEFAULT_AGENT_CONFIG_DIR)) - if not base_dir.is_absolute(): - base_dir = Path.cwd() / base_dir - return base_dir.resolve() - - -def _resolve_agent_selection( - agent_config_path: Optional[str] = None, - agent_profile: Optional[str] = None, - argv: Optional[List[str]] = None, -) -> AgentConfigSelection: - cli_path, cli_profile = _parse_cli_agent_args(list(argv if argv is not None else sys.argv[1:])) - path_value = agent_config_path or cli_path or os.getenv("AGENT_CONFIG_PATH") - profile_value = agent_profile or cli_profile or os.getenv("AGENT_PROFILE") - source = "none" - candidate: Optional[Path] = None - - if path_value: - source = "cli_path" if (agent_config_path or cli_path) else "env_path" - candidate = Path(path_value) - elif profile_value: - source = "cli_profile" if (agent_profile or cli_profile) else "env_profile" - candidate = _agent_config_dir() / f"{profile_value}.yaml" - else: - fallback = _agent_config_dir() / _DEFAULT_AGENT_CONFIG_FILE - if fallback.exists(): - source = "default" - candidate = fallback - - if candidate is None: - raise ValueError( - "Agent YAML config is required. Provide --agent-config/--agent-profile " - "or create config/agents/default.yaml." - ) - - if not candidate.is_absolute(): - candidate = (Path.cwd() / candidate).resolve() - else: - candidate = candidate.resolve() - - if not candidate.exists(): - raise ValueError(f"Agent config file not found ({source}): {candidate}") - if not candidate.is_file(): - raise ValueError(f"Agent config path is not a file: {candidate}") - return AgentConfigSelection(path=candidate, source=source) - - -def _resolve_env_refs(value: Any) -> Any: - """Resolve ${ENV_VAR} / ${ENV_VAR:default} placeholders recursively.""" - if isinstance(value, dict): - return {k: _resolve_env_refs(v) for k, v in value.items()} - if isinstance(value, list): - return [_resolve_env_refs(item) for item in value] - if not isinstance(value, str) or "${" not in value: - return value - - def _replace(match: re.Match[str]) -> str: - env_key = match.group(1) - default_value = match.group(2) - env_value = os.getenv(env_key) - if env_value is None: - if default_value is None: - raise ValueError(f"Missing environment variable referenced in agent YAML: {env_key}") - return default_value - return env_value - - return _ENV_REF_PATTERN.sub(_replace, value) - - -def _normalize_agent_overrides(raw: Dict[str, Any]) -> Dict[str, Any]: - """Normalize YAML into flat Settings fields.""" - normalized: Dict[str, Any] = {} - - for key, value in raw.items(): - if key == "siliconflow": - raise ValueError( - "Section 'siliconflow' is no longer supported. " - "Move provider-specific fields into agent.llm / agent.asr / agent.tts." - ) - if key == "tools": - if not isinstance(value, list): - raise ValueError("Agent config key 'tools' must be a list") - normalized["tools"] = value - continue - section_map = _AGENT_SECTION_KEY_MAP.get(key) - if section_map is None: - normalized[key] = value - continue - - if not isinstance(value, dict): - raise ValueError(f"Agent config section '{key}' must be a mapping") - - for nested_key, nested_value in value.items(): - mapped_key = section_map.get(nested_key) - if mapped_key is None: - raise ValueError(f"Unknown key in '{key}' section: '{nested_key}'") - normalized[mapped_key] = nested_value - - unknown_keys = sorted(set(normalized) - _AGENT_SETTING_KEYS) - if unknown_keys: - raise ValueError( - "Unknown agent config keys in YAML: " - + ", ".join(unknown_keys) - ) - return normalized - - -def _missing_required_keys(overrides: Dict[str, Any]) -> List[str]: - missing = set(_BASE_REQUIRED_AGENT_SETTING_KEYS - set(overrides)) - string_required = { - "vad_type", - "vad_model_path", - "llm_provider", - "llm_model", - "tts_provider", - "tts_voice", - "asr_provider", - "duplex_system_prompt", - } - for key in string_required: - if key in overrides and _is_blank(overrides.get(key)): - missing.add(key) - - llm_provider = _normalized_provider(overrides, "llm_provider", "openai") - if llm_provider in _OPENAI_COMPATIBLE_LLM_PROVIDERS or llm_provider == "openai": - if "llm_api_key" not in overrides or _is_blank(overrides.get("llm_api_key")): - missing.add("llm_api_key") - - tts_provider = _normalized_provider(overrides, "tts_provider", "openai_compatible") - if tts_provider in _OPENAI_COMPATIBLE_TTS_PROVIDERS: - if "tts_api_key" not in overrides or _is_blank(overrides.get("tts_api_key")): - missing.add("tts_api_key") - if "tts_api_url" not in overrides or _is_blank(overrides.get("tts_api_url")): - missing.add("tts_api_url") - if "tts_model" not in overrides or _is_blank(overrides.get("tts_model")): - missing.add("tts_model") - elif tts_provider in _DASHSCOPE_TTS_PROVIDERS: - if "tts_api_key" not in overrides or _is_blank(overrides.get("tts_api_key")): - missing.add("tts_api_key") - - asr_provider = _normalized_provider(overrides, "asr_provider", "openai_compatible") - if asr_provider in _OPENAI_COMPATIBLE_ASR_PROVIDERS: - if "asr_api_key" not in overrides or _is_blank(overrides.get("asr_api_key")): - missing.add("asr_api_key") - if "asr_api_url" not in overrides or _is_blank(overrides.get("asr_api_url")): - missing.add("asr_api_url") - if "asr_model" not in overrides or _is_blank(overrides.get("asr_model")): - missing.add("asr_model") - - return sorted(missing) - - -def _load_agent_overrides(selection: AgentConfigSelection) -> Dict[str, Any]: - if yaml is None: - raise RuntimeError( - "PyYAML is required for agent YAML configuration. Install with: pip install pyyaml" - ) - - with selection.path.open("r", encoding="utf-8") as file: - raw = yaml.safe_load(file) or {} - - if not isinstance(raw, dict): - raise ValueError(f"Agent config must be a YAML mapping: {selection.path}") - - if "agent" in raw: - agent_value = raw["agent"] - if not isinstance(agent_value, dict): - raise ValueError("The 'agent' key in YAML must be a mapping") - raw = agent_value - - resolved = _resolve_env_refs(raw) - overrides = _normalize_agent_overrides(resolved) - missing_required = _missing_required_keys(overrides) - if missing_required: - raise ValueError( - f"Missing required agent settings in YAML ({selection.path}): " - + ", ".join(missing_required) - ) - - overrides["agent_config_path"] = str(selection.path) - overrides["agent_config_source"] = selection.source - return overrides - - -def load_settings( - agent_config_path: Optional[str] = None, - agent_profile: Optional[str] = None, - argv: Optional[List[str]] = None, -) -> "Settings": - """Load settings from .env and optional agent YAML.""" - selection = _resolve_agent_selection( - agent_config_path=agent_config_path, - agent_profile=agent_profile, - argv=argv, - ) - agent_overrides = _load_agent_overrides(selection) - return Settings(**agent_overrides) +_prime_process_env_from_dotenv() class Settings(BaseSettings): @@ -404,7 +64,6 @@ class Settings(BaseSettings): default="openai", description="LLM provider (openai, openai_compatible, siliconflow)" ) - llm_api_key: Optional[str] = Field(default=None, description="LLM provider API key") llm_api_url: Optional[str] = Field(default=None, description="LLM provider API base URL") llm_model: str = Field(default="gpt-4o-mini", description="LLM model name") llm_temperature: float = Field(default=0.7, description="LLM temperature for response generation") @@ -414,7 +73,6 @@ class Settings(BaseSettings): default="openai_compatible", description="TTS provider (edge, openai_compatible, siliconflow, dashscope)" ) - tts_api_key: Optional[str] = Field(default=None, description="TTS provider API key") tts_api_url: Optional[str] = Field(default=None, description="TTS provider API URL") tts_model: Optional[str] = Field(default=None, description="TTS model name") tts_voice: str = Field(default="anna", description="TTS voice name") @@ -429,7 +87,6 @@ class Settings(BaseSettings): default="openai_compatible", description="ASR provider (openai_compatible, buffered, siliconflow)" ) - asr_api_key: Optional[str] = Field(default=None, description="ASR provider API key") asr_api_url: Optional[str] = Field(default=None, description="ASR provider API URL") asr_model: Optional[str] = Field(default=None, description="ASR model name") asr_interim_interval_ms: int = Field(default=500, description="Interval for interim ASR results in ms") @@ -505,6 +162,10 @@ class Settings(BaseSettings): ) backend_url: Optional[str] = Field(default=None, description="Backend API base URL (e.g. http://localhost:8787)") backend_timeout_sec: int = Field(default=10, description="Backend API request timeout in seconds") + assistant_local_config_dir: str = Field( + default="engine/config/agents", + description="Directory containing local assistant runtime YAML files" + ) history_enabled: bool = Field(default=True, description="Enable history write bridge") history_default_user_id: int = Field(default=1, description="Fallback user_id for history records") history_queue_max_size: int = Field(default=256, description="Max buffered transcript writes per session") @@ -515,10 +176,6 @@ class Settings(BaseSettings): description="Max wait before finalizing history when queue is still draining" ) - # Agent YAML metadata - agent_config_path: Optional[str] = Field(default=None, description="Resolved agent YAML path") - agent_config_source: str = Field(default="none", description="How the agent YAML was selected") - @property def chunk_size_bytes(self) -> int: """Calculate chunk size in bytes based on sample rate and duration.""" @@ -543,7 +200,7 @@ class Settings(BaseSettings): # Global settings instance -settings = load_settings() +settings = Settings() def get_settings() -> Settings: diff --git a/engine/app/main.py b/engine/app/main.py index b8a39bb..b4c5c05 100644 --- a/engine/app/main.py +++ b/engine/app/main.py @@ -371,12 +371,10 @@ async def startup_event(): logger.info(f"Server: {settings.host}:{settings.port}") logger.info(f"Sample rate: {settings.sample_rate} Hz") logger.info(f"VAD model: {settings.vad_model_path}") - if settings.agent_config_path: - logger.info( - f"Agent config loaded ({settings.agent_config_source}): {settings.agent_config_path}" - ) - else: - logger.info("Agent config: none (using .env/default agent values)") + logger.info( + "Assistant runtime config source: backend when BACKEND_URL is set, " + "otherwise local YAML by assistant_id from ASSISTANT_LOCAL_CONFIG_DIR" + ) @app.on_event("shutdown") diff --git a/engine/core/duplex_pipeline.py b/engine/core/duplex_pipeline.py index 13f1852..d6c81ee 100644 --- a/engine/core/duplex_pipeline.py +++ b/engine/core/duplex_pipeline.py @@ -893,7 +893,7 @@ class DuplexPipeline: # Connect LLM service if not self.llm_service: llm_provider = (self._runtime_llm.get("provider") or settings.llm_provider).lower() - llm_api_key = self._runtime_llm.get("apiKey") or settings.llm_api_key + llm_api_key = self._runtime_llm.get("apiKey") llm_base_url = ( self._runtime_llm.get("baseUrl") or settings.llm_api_url @@ -926,7 +926,7 @@ class DuplexPipeline: if tts_output_enabled: if not self.tts_service: tts_provider = (self._runtime_tts.get("provider") or settings.tts_provider).lower() - tts_api_key = self._runtime_tts.get("apiKey") or settings.tts_api_key + tts_api_key = self._runtime_tts.get("apiKey") tts_api_url = self._runtime_tts.get("baseUrl") or settings.tts_api_url tts_voice = self._runtime_tts.get("voice") or settings.tts_voice tts_model = self._runtime_tts.get("model") or settings.tts_model @@ -982,7 +982,7 @@ class DuplexPipeline: # Connect ASR service if not self.asr_service: asr_provider = (self._runtime_asr.get("provider") or settings.asr_provider).lower() - asr_api_key = self._runtime_asr.get("apiKey") or settings.asr_api_key + asr_api_key = self._runtime_asr.get("apiKey") asr_api_url = self._runtime_asr.get("baseUrl") or settings.asr_api_url asr_model = self._runtime_asr.get("model") or settings.asr_model asr_interim_interval = int(self._runtime_asr.get("interimIntervalMs") or settings.asr_interim_interval_ms) diff --git a/engine/docs/backend_integration.md b/engine/docs/backend_integration.md index 1f5d14d..e8165fd 100644 --- a/engine/docs/backend_integration.md +++ b/engine/docs/backend_integration.md @@ -10,6 +10,7 @@ Configure with environment variables: - `BACKEND_MODE=auto|http|disabled` - `BACKEND_URL` - `BACKEND_TIMEOUT_SEC` +- `ASSISTANT_LOCAL_CONFIG_DIR` (default: `engine/config/agents`) - `HISTORY_ENABLED=true|false` Mode behavior: @@ -18,6 +19,12 @@ Mode behavior: - `http`: force HTTP backend adapter (falls back to null adapter when URL is missing). - `disabled`: force null adapter and run engine-only. +Assistant config source behavior: + +- If `BACKEND_URL` exists and backend mode is enabled, fetch assistant config from backend. +- If `BACKEND_URL` is missing (or backend mode is disabled), load assistant config from local YAML. +- `assistant_id` query parameter is still required and maps to `engine/config/agents/.yaml` when local YAML source is active. + ## Architecture - Ports: `core/ports/backend.py` diff --git a/engine/examples/wav_client.py b/engine/examples/wav_client.py index 1e4a50d..7e4aef1 100644 --- a/engine/examples/wav_client.py +++ b/engine/examples/wav_client.py @@ -58,7 +58,7 @@ class WavFileClient: url: str, input_file: str, output_file: str, - assistant_id: str = "assistant_demo", + assistant_id: str = "default", channel: str = "wav_client", sample_rate: int = 16000, chunk_duration_ms: int = 20, @@ -520,7 +520,7 @@ async def main(): ) parser.add_argument( "--assistant-id", - default="assistant_demo", + default="default", help="Assistant identifier used in websocket query parameter" ) parser.add_argument( diff --git a/engine/scripts/generate_test_audio/.env.example b/engine/scripts/generate_test_audio/.env.example new file mode 100644 index 0000000..72b7707 --- /dev/null +++ b/engine/scripts/generate_test_audio/.env.example @@ -0,0 +1 @@ +SILICONFLOW_API_KEY=sk-4163471a164f40769590b72863711781 \ No newline at end of file diff --git a/engine/services/dashscope_tts.py b/engine/services/dashscope_tts.py index 6d89221..1ddcbff 100644 --- a/engine/services/dashscope_tts.py +++ b/engine/services/dashscope_tts.py @@ -89,7 +89,7 @@ class DashScopeTTSService(BaseTTSService): speed: float = 1.0, ): super().__init__(voice=voice, sample_rate=sample_rate, speed=speed) - self.api_key = api_key or os.getenv("DASHSCOPE_API_KEY") or os.getenv("TTS_API_KEY") + self.api_key = api_key self.api_url = ( api_url or os.getenv("DASHSCOPE_TTS_API_URL") diff --git a/engine/services/llm.py b/engine/services/llm.py index eb7f89c..c4d539e 100644 --- a/engine/services/llm.py +++ b/engine/services/llm.py @@ -44,13 +44,13 @@ class OpenAILLMService(BaseLLMService): Args: model: Model name (e.g., "gpt-4o-mini", "gpt-4o") - api_key: Provider API key (defaults to LLM_API_KEY/OPENAI_API_KEY env vars) + api_key: Provider API key base_url: Custom API base URL (for Azure or compatible APIs) system_prompt: Default system prompt for conversations """ super().__init__(model=model) - self.api_key = api_key or os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") + self.api_key = api_key self.base_url = base_url or os.getenv("LLM_API_URL") or os.getenv("OPENAI_API_URL") self.system_prompt = system_prompt or ( "You are a helpful, friendly voice assistant. " diff --git a/engine/services/openai_compatible_asr.py b/engine/services/openai_compatible_asr.py index 7972189..182d7a0 100644 --- a/engine/services/openai_compatible_asr.py +++ b/engine/services/openai_compatible_asr.py @@ -75,7 +75,7 @@ class OpenAICompatibleASRService(BaseASRService): if not AIOHTTP_AVAILABLE: raise RuntimeError("aiohttp is required for OpenAICompatibleASRService") - self.api_key = api_key or os.getenv("ASR_API_KEY") or os.getenv("SILICONFLOW_API_KEY") + self.api_key = api_key raw_api_url = api_url or os.getenv("ASR_API_URL") or self.API_URL self.api_url = self._resolve_transcriptions_endpoint(raw_api_url) self.model = self.MODELS.get(model.lower(), model) diff --git a/engine/services/openai_compatible_tts.py b/engine/services/openai_compatible_tts.py index b2dc30d..41e3e45 100644 --- a/engine/services/openai_compatible_tts.py +++ b/engine/services/openai_compatible_tts.py @@ -49,7 +49,7 @@ class OpenAICompatibleTTSService(BaseTTSService): Initialize OpenAI-compatible TTS service. Args: - api_key: Provider API key (defaults to TTS_API_KEY/SILICONFLOW_API_KEY env vars) + api_key: Provider API key api_url: Provider API URL (defaults to SiliconFlow endpoint) voice: Voice name (alex, anna, bella, benjamin, charles, claire, david, diana) model: Model name @@ -73,7 +73,7 @@ class OpenAICompatibleTTSService(BaseTTSService): super().__init__(voice=full_voice, sample_rate=sample_rate, speed=speed) - self.api_key = api_key or os.getenv("TTS_API_KEY") or os.getenv("SILICONFLOW_API_KEY") + self.api_key = api_key self.model = model raw_api_url = api_url or os.getenv("TTS_API_URL") or "https://api.siliconflow.cn/v1/audio/speech" self.api_url = self._resolve_speech_endpoint(raw_api_url) diff --git a/engine/services/realtime.py b/engine/services/realtime.py index 3fd95c1..142f018 100644 --- a/engine/services/realtime.py +++ b/engine/services/realtime.py @@ -13,7 +13,6 @@ The Realtime API provides: - Barge-in/interruption handling """ -import os import asyncio import json import base64 @@ -98,7 +97,6 @@ class RealtimeService: config: Realtime configuration (uses defaults if not provided) """ self.config = config or RealtimeConfig() - self.config.api_key = self.config.api_key or os.getenv("OPENAI_API_KEY") self.state = RealtimeState.DISCONNECTED self._ws = None diff --git a/engine/tests/test_agent_config.py b/engine/tests/test_agent_config.py index 6432581..90bb277 100644 --- a/engine/tests/test_agent_config.py +++ b/engine/tests/test_agent_config.py @@ -1,293 +1,21 @@ -import os -from pathlib import Path +import importlib -import pytest -os.environ.setdefault("LLM_API_KEY", "test-openai-key") -os.environ.setdefault("TTS_API_KEY", "test-tts-key") -os.environ.setdefault("ASR_API_KEY", "test-asr-key") +def test_settings_load_from_environment(monkeypatch): + monkeypatch.setenv("HOST", "127.0.0.1") + monkeypatch.setenv("PORT", "8123") -from app.config import load_settings + import app.config as config_module + importlib.reload(config_module) + settings = config_module.get_settings() + assert settings.host == "127.0.0.1" + assert settings.port == 8123 -def _write_yaml(path: Path, content: str) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content, encoding="utf-8") +def test_assistant_local_config_dir_default_present(): + import app.config as config_module -def _full_agent_yaml(llm_model: str = "gpt-4o-mini", llm_key: str = "test-openai-key") -> str: - return f""" -agent: - vad: - type: silero - model_path: data/vad/silero_vad.onnx - threshold: 0.63 - min_speech_duration_ms: 100 - eou_threshold_ms: 800 - - llm: - provider: openai_compatible - model: {llm_model} - temperature: 0.2 - api_key: {llm_key} - api_url: https://example-llm.invalid/v1 - - tts: - provider: openai_compatible - api_key: test-tts-key - api_url: https://example-tts.invalid/v1/audio/speech - model: FunAudioLLM/CosyVoice2-0.5B - voice: anna - speed: 1.0 - - asr: - provider: openai_compatible - api_key: test-asr-key - api_url: https://example-asr.invalid/v1/audio/transcriptions - model: FunAudioLLM/SenseVoiceSmall - interim_interval_ms: 500 - min_audio_ms: 300 - start_min_speech_ms: 160 - pre_speech_ms: 240 - final_tail_ms: 120 - - duplex: - enabled: true - system_prompt: You are a strict test assistant. - - barge_in: - min_duration_ms: 200 - silence_tolerance_ms: 60 -""".strip() - - -def _dashscope_tts_yaml() -> str: - return _full_agent_yaml().replace( - """ tts: - provider: openai_compatible - api_key: test-tts-key - api_url: https://example-tts.invalid/v1/audio/speech - model: FunAudioLLM/CosyVoice2-0.5B - voice: anna - speed: 1.0 -""", - """ tts: - provider: dashscope - api_key: test-dashscope-key - voice: Cherry - speed: 1.0 -""", - ) - - -def test_cli_profile_loads_agent_yaml(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - config_dir = tmp_path / "config" / "agents" - _write_yaml( - config_dir / "support.yaml", - _full_agent_yaml(llm_model="gpt-4.1-mini"), - ) - - settings = load_settings( - argv=["--agent-profile", "support"], - ) - - assert settings.llm_model == "gpt-4.1-mini" - assert settings.llm_temperature == 0.2 - assert settings.vad_threshold == 0.63 - assert settings.agent_config_source == "cli_profile" - assert settings.agent_config_path == str((config_dir / "support.yaml").resolve()) - - -def test_cli_path_has_higher_priority_than_env(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - env_file = tmp_path / "config" / "agents" / "env.yaml" - cli_file = tmp_path / "config" / "agents" / "cli.yaml" - - _write_yaml(env_file, _full_agent_yaml(llm_model="env-model")) - _write_yaml(cli_file, _full_agent_yaml(llm_model="cli-model")) - - monkeypatch.setenv("AGENT_CONFIG_PATH", str(env_file)) - - settings = load_settings(argv=["--agent-config", str(cli_file)]) - - assert settings.llm_model == "cli-model" - assert settings.agent_config_source == "cli_path" - assert settings.agent_config_path == str(cli_file.resolve()) - - -def test_default_yaml_is_loaded_without_args_or_env(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - default_file = tmp_path / "config" / "agents" / "default.yaml" - _write_yaml(default_file, _full_agent_yaml(llm_model="from-default")) - - monkeypatch.delenv("AGENT_CONFIG_PATH", raising=False) - monkeypatch.delenv("AGENT_PROFILE", raising=False) - - settings = load_settings(argv=[]) - - assert settings.llm_model == "from-default" - assert settings.agent_config_source == "default" - assert settings.agent_config_path == str(default_file.resolve()) - - -def test_missing_required_agent_settings_fail(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "missing-required.yaml" - _write_yaml( - file_path, - """ -agent: - llm: - model: gpt-4o-mini -""".strip(), - ) - - with pytest.raises(ValueError, match="Missing required agent settings in YAML"): - load_settings(argv=["--agent-config", str(file_path)]) - - -def test_blank_required_provider_key_fails(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "blank-key.yaml" - _write_yaml(file_path, _full_agent_yaml(llm_key="")) - - with pytest.raises(ValueError, match="Missing required agent settings in YAML"): - load_settings(argv=["--agent-config", str(file_path)]) - - -def test_missing_tts_api_url_fails(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "missing-tts-url.yaml" - _write_yaml( - file_path, - _full_agent_yaml().replace( - " api_url: https://example-tts.invalid/v1/audio/speech\n", - "", - ), - ) - - with pytest.raises(ValueError, match="Missing required agent settings in YAML"): - load_settings(argv=["--agent-config", str(file_path)]) - - -def test_dashscope_tts_allows_default_url_and_model(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "dashscope-tts.yaml" - _write_yaml(file_path, _dashscope_tts_yaml()) - - settings = load_settings(argv=["--agent-config", str(file_path)]) - - assert settings.tts_provider == "dashscope" - assert settings.tts_api_key == "test-dashscope-key" - assert settings.tts_api_url is None - assert settings.tts_model is None - - -def test_dashscope_tts_requires_api_key(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "dashscope-tts-missing-key.yaml" - _write_yaml(file_path, _dashscope_tts_yaml().replace(" api_key: test-dashscope-key\n", "")) - - with pytest.raises(ValueError, match="Missing required agent settings in YAML"): - load_settings(argv=["--agent-config", str(file_path)]) - - -def test_missing_asr_api_url_fails(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "missing-asr-url.yaml" - _write_yaml( - file_path, - _full_agent_yaml().replace( - " api_url: https://example-asr.invalid/v1/audio/transcriptions\n", - "", - ), - ) - - with pytest.raises(ValueError, match="Missing required agent settings in YAML"): - load_settings(argv=["--agent-config", str(file_path)]) - - -def test_agent_yaml_unknown_key_fails(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "bad-agent.yaml" - _write_yaml(file_path, _full_agent_yaml() + "\n unknown_option: true") - - with pytest.raises(ValueError, match="Unknown agent config keys"): - load_settings(argv=["--agent-config", str(file_path)]) - - -def test_legacy_siliconflow_section_fails(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "legacy-siliconflow.yaml" - _write_yaml( - file_path, - """ -agent: - siliconflow: - api_key: x -""".strip(), - ) - - with pytest.raises(ValueError, match="Section 'siliconflow' is no longer supported"): - load_settings(argv=["--agent-config", str(file_path)]) - - -def test_agent_yaml_missing_env_reference_fails(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "bad-ref.yaml" - _write_yaml( - file_path, - _full_agent_yaml(llm_key="${UNSET_LLM_API_KEY}"), - ) - - with pytest.raises(ValueError, match="Missing environment variable"): - load_settings(argv=["--agent-config", str(file_path)]) - - -def test_agent_yaml_tools_list_is_loaded(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "tools-agent.yaml" - _write_yaml( - file_path, - _full_agent_yaml() - + """ - - tools: - - current_time - - name: weather - description: Get weather by city. - parameters: - type: object - properties: - city: - type: string - required: [city] - executor: server -""", - ) - - settings = load_settings(argv=["--agent-config", str(file_path)]) - - assert isinstance(settings.tools, list) - assert settings.tools[0] == "current_time" - assert settings.tools[1]["name"] == "weather" - assert settings.tools[1]["executor"] == "server" - - -def test_agent_yaml_tools_must_be_list(monkeypatch, tmp_path): - monkeypatch.chdir(tmp_path) - file_path = tmp_path / "bad-tools-agent.yaml" - _write_yaml( - file_path, - _full_agent_yaml() - + """ - - tools: - weather: - executor: server -""", - ) - - with pytest.raises(ValueError, match="Agent config key 'tools' must be a list"): - load_settings(argv=["--agent-config", str(file_path)]) + settings = config_module.get_settings() + assert isinstance(settings.assistant_local_config_dir, str) + assert settings.assistant_local_config_dir diff --git a/engine/tests/test_backend_adapters.py b/engine/tests/test_backend_adapters.py index d55f5e2..347df45 100644 --- a/engine/tests/test_backend_adapters.py +++ b/engine/tests/test_backend_adapters.py @@ -2,24 +2,42 @@ import aiohttp import pytest from app.backend_adapters import ( - HistoryDisabledBackendAdapter, - HttpBackendAdapter, - NullBackendAdapter, + AssistantConfigSourceAdapter, + LocalYamlAssistantConfigAdapter, build_backend_adapter, ) @pytest.mark.asyncio -async def test_build_backend_adapter_without_url_returns_null_adapter(): +async def test_without_backend_url_uses_local_yaml_for_assistant_config(tmp_path): + config_dir = tmp_path / "assistants" + config_dir.mkdir(parents=True, exist_ok=True) + (config_dir / "dev_local.yaml").write_text( + "\n".join( + [ + "assistant:", + " assistantId: dev_local", + " systemPrompt: local prompt", + " greeting: local greeting", + ] + ), + encoding="utf-8", + ) + adapter = build_backend_adapter( backend_url=None, backend_mode="auto", history_enabled=True, timeout_sec=3, + assistant_local_config_dir=str(config_dir), ) - assert isinstance(adapter, NullBackendAdapter) + assert isinstance(adapter, AssistantConfigSourceAdapter) - assert await adapter.fetch_assistant_config("assistant_1") is None + payload = await adapter.fetch_assistant_config("dev_local") + assert isinstance(payload, dict) + assert payload.get("__error_code") in (None, "") + assert payload["assistant"]["assistantId"] == "dev_local" + assert payload["assistant"]["systemPrompt"] == "local prompt" assert ( await adapter.create_call_record( user_id=1, @@ -54,7 +72,7 @@ async def test_build_backend_adapter_without_url_returns_null_adapter(): @pytest.mark.asyncio -async def test_http_backend_adapter_create_call_record_posts_expected_payload(monkeypatch): +async def test_http_backend_adapter_create_call_record_posts_expected_payload(monkeypatch, tmp_path): captured = {} class _FakeResponse: @@ -90,15 +108,31 @@ async def test_http_backend_adapter_create_call_record_posts_expected_payload(mo captured["json"] = json return _FakeResponse(status=200, payload={"id": "call_123"}) + def get(self, url): + _ = url + return _FakeResponse( + status=200, + payload={ + "assistant": { + "assistantId": "assistant_9", + "systemPrompt": "backend prompt", + } + }, + ) + monkeypatch.setattr("app.backend_adapters.aiohttp.ClientSession", _FakeClientSession) + config_dir = tmp_path / "assistants" + config_dir.mkdir(parents=True, exist_ok=True) + adapter = build_backend_adapter( backend_url="http://localhost:8100", backend_mode="auto", history_enabled=True, timeout_sec=7, + assistant_local_config_dir=str(config_dir), ) - assert isinstance(adapter, HttpBackendAdapter) + assert isinstance(adapter, AssistantConfigSourceAdapter) call_id = await adapter.create_call_record( user_id=99, @@ -119,25 +153,115 @@ async def test_http_backend_adapter_create_call_record_posts_expected_payload(mo @pytest.mark.asyncio -async def test_backend_mode_disabled_forces_null_even_with_url(): +async def test_with_backend_url_uses_backend_for_assistant_config(monkeypatch, tmp_path): + class _FakeResponse: + def __init__(self, status=200, payload=None): + self.status = status + self._payload = payload if payload is not None else {} + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return None + + async def json(self): + return self._payload + + def raise_for_status(self): + if self.status >= 400: + raise RuntimeError("http_error") + + class _FakeClientSession: + def __init__(self, timeout=None): + self.timeout = timeout + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return None + + def get(self, url): + _ = url + return _FakeResponse( + status=200, + payload={ + "assistant": { + "assistantId": "dev_http", + "systemPrompt": "backend prompt", + } + }, + ) + + def post(self, url, json=None): + _ = (url, json) + return _FakeResponse(status=200, payload={"id": "call_1"}) + + monkeypatch.setattr("app.backend_adapters.aiohttp.ClientSession", _FakeClientSession) + + config_dir = tmp_path / "assistants" + config_dir.mkdir(parents=True, exist_ok=True) + (config_dir / "dev_http.yaml").write_text( + "\n".join( + [ + "assistant:", + " assistantId: dev_http", + " systemPrompt: local prompt", + ] + ), + encoding="utf-8", + ) + + adapter = build_backend_adapter( + backend_url="http://localhost:8100", + backend_mode="auto", + history_enabled=True, + timeout_sec=3, + assistant_local_config_dir=str(config_dir), + ) + assert isinstance(adapter, AssistantConfigSourceAdapter) + + payload = await adapter.fetch_assistant_config("dev_http") + assert payload["assistant"]["assistantId"] == "dev_http" + assert payload["assistant"]["systemPrompt"] == "backend prompt" + + +@pytest.mark.asyncio +async def test_backend_mode_disabled_uses_local_assistant_config_even_with_url(monkeypatch, tmp_path): + class _FailIfCalledClientSession: + def __init__(self, timeout=None): + _ = timeout + raise AssertionError("HTTP client should not be created when backend_mode=disabled") + + monkeypatch.setattr("app.backend_adapters.aiohttp.ClientSession", _FailIfCalledClientSession) + + config_dir = tmp_path / "assistants" + config_dir.mkdir(parents=True, exist_ok=True) + (config_dir / "dev_disabled.yaml").write_text( + "\n".join( + [ + "assistant:", + " assistantId: dev_disabled", + " systemPrompt: local disabled prompt", + ] + ), + encoding="utf-8", + ) + adapter = build_backend_adapter( backend_url="http://localhost:8100", backend_mode="disabled", history_enabled=True, - timeout_sec=7, + timeout_sec=3, + assistant_local_config_dir=str(config_dir), ) - assert isinstance(adapter, NullBackendAdapter) + assert isinstance(adapter, AssistantConfigSourceAdapter) + payload = await adapter.fetch_assistant_config("dev_disabled") + assert payload["assistant"]["assistantId"] == "dev_disabled" + assert payload["assistant"]["systemPrompt"] == "local disabled prompt" -@pytest.mark.asyncio -async def test_history_disabled_wraps_backend_adapter(): - adapter = build_backend_adapter( - backend_url="http://localhost:8100", - backend_mode="auto", - history_enabled=False, - timeout_sec=7, - ) - assert isinstance(adapter, HistoryDisabledBackendAdapter) assert await adapter.create_call_record(user_id=1, assistant_id="a1", source="debug") is None assert await adapter.add_transcript( call_id="c1", @@ -148,3 +272,53 @@ async def test_history_disabled_wraps_backend_adapter(): end_ms=10, duration_ms=10, ) is False + + +@pytest.mark.asyncio +async def test_local_yaml_adapter_rejects_path_traversal_like_assistant_id(tmp_path): + adapter = LocalYamlAssistantConfigAdapter(str(tmp_path)) + payload = await adapter.fetch_assistant_config("../etc/passwd") + assert payload == {"__error_code": "assistant.not_found", "assistantId": "../etc/passwd"} + + +@pytest.mark.asyncio +async def test_local_yaml_translates_agent_schema_to_runtime_services(tmp_path): + config_dir = tmp_path / "assistants" + config_dir.mkdir(parents=True, exist_ok=True) + (config_dir / "default.yaml").write_text( + "\n".join( + [ + "agent:", + " llm:", + " provider: openai", + " model: gpt-4o-mini", + " api_key: sk-llm", + " api_url: https://api.example.com/v1", + " tts:", + " provider: openai_compatible", + " model: tts-model", + " api_key: sk-tts", + " api_url: https://tts.example.com/v1/audio/speech", + " voice: anna", + " asr:", + " provider: openai_compatible", + " model: asr-model", + " api_key: sk-asr", + " api_url: https://asr.example.com/v1/audio/transcriptions", + " duplex:", + " system_prompt: You are test assistant", + ] + ), + encoding="utf-8", + ) + + adapter = LocalYamlAssistantConfigAdapter(str(config_dir)) + payload = await adapter.fetch_assistant_config("default") + + assert isinstance(payload, dict) + assistant = payload.get("assistant", {}) + services = assistant.get("services", {}) + assert services.get("llm", {}).get("apiKey") == "sk-llm" + assert services.get("tts", {}).get("apiKey") == "sk-tts" + assert services.get("asr", {}).get("apiKey") == "sk-asr" + assert assistant.get("systemPrompt") == "You are test assistant" diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..d7d3b64 --- /dev/null +++ b/examples/README.md @@ -0,0 +1 @@ +# Example Application using RAS \ No newline at end of file