AI-VideoAssistant/engine/config/agents/tools.yaml

# Agent behavior configuration with tool declarations.
# This profile is an example only.

agent:
  vad:
    type: silero
    model_path: data/vad/silero_vad.onnx
    threshold: 0.5
    min_speech_duration_ms: 100
    eou_threshold_ms: 800

  llm:
    # provider: openai | openai_compatible | siliconflow
    provider: openai_compatible
    model: deepseek-v3
    temperature: 0.7
    api_key: your_llm_api_key
    api_url: https://api.qnaigc.com/v1

  tts:
    # provider: openai_compatible | siliconflow | dashscope | volcengine
    # dashscope defaults (if omitted):
    #   api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime
    #   model: qwen3-tts-flash-realtime
    #   dashscope_mode: commit (engine splits) | server_commit (dashscope splits)
    #   note: dashscope_mode/mode is ONLY used when provider=dashscope.
    # volcengine defaults (if omitted):
    #   api_url: https://openspeech.bytedance.com/api/v3/tts/unidirectional
    #   resource_id: seed-tts-2.0
    #   app_id: your volcengine app key
    #   api_key: your volcengine access key
    provider: openai_compatible
    api_key: your_tts_api_key
    api_url: https://api.siliconflow.cn/v1/audio/speech
    model: FunAudioLLM/CosyVoice2-0.5B
    voice: anna
    speed: 1.0

  asr:
    # provider: buffered | openai_compatible | siliconflow | dashscope | volcengine
    # dashscope defaults (if omitted):
    #   api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime
    #   model: qwen3-asr-flash-realtime
    #   note: dashscope uses streaming ASR mode (chunk-by-chunk).
    # volcengine defaults (if omitted):
    #   api_url: wss://openspeech.bytedance.com/api/v3/sauc/bigmodel
    #   model: bigmodel
    #   resource_id: volc.bigasr.sauc.duration
    #   app_id: your volcengine app key
    #   api_key: your volcengine access key
    #   request_params:
    #     end_window_size: 800
    #     force_to_speech_time: 1000
    #   note: volcengine uses streaming ASR mode (chunk-by-chunk).
    provider: openai_compatible
    api_key: your_asr_api_key
    api_url: https://api.siliconflow.cn/v1/audio/transcriptions
    model: FunAudioLLM/SenseVoiceSmall
    enable_interim: false
    interim_interval_ms: 500
    min_audio_ms: 300
    start_min_speech_ms: 160
    pre_speech_ms: 240
    final_tail_ms: 120

  duplex:
    enabled: true
    system_prompt: You are a helpful voice assistant with tool-calling support.

  barge_in:
    min_duration_ms: 200
    silence_tolerance_ms: 60

  # Tool declarations consumed by the engine at startup.
  # - String form enables built-in/default tool schema when available.
  # - Object form provides OpenAI function schema + executor hint.
  tools:
    - current_time
    - calculator
    - name: weather
      description: Get weather by city name.
      parameters:
        type: object
        properties:
          city:
            type: string
            description: City name, for example "San Francisco".
        required: [city]
      executor: server
    - name: open_map
      description: Open map app on the client device.
      parameters:
        type: object
        properties:
          query:
            type: string
        required: [query]
      executor: client