Add presence probe configuration to Assistant model and API. Introduce new fields for enabling presence probes, idle and cooldown durations, maximum prompts, context inclusion, and custom questions. Update schemas, routers, and frontend components to support these features, along with corresponding tests to ensure functionality.

This commit is contained in:
Xin Wang
2026-02-28 15:47:53 +08:00
parent 0821d73e7c
commit 8f1317860f
11 changed files with 1006 additions and 3 deletions

View File

@@ -1,5 +1,6 @@
import asyncio
import json
import time
from typing import Any, Dict, List
import pytest
@@ -86,7 +87,12 @@ def _build_pipeline(monkeypatch, llm_rounds: List[List[LLMStreamEvent]]) -> tupl
async def _capture_event(event: Dict[str, Any], priority: int = 20):
events.append(event)
async def _noop_speak(_text: str, fade_in_ms: int = 0, fade_out_ms: int = 8):
async def _noop_speak(
_text: str,
fade_in_ms: int = 0,
fade_out_ms: int = 8,
**_kwargs,
):
return None
monkeypatch.setattr(pipeline, "_send_event", _capture_event)
@@ -362,6 +368,89 @@ async def test_duplicate_tool_results_are_ignored(monkeypatch):
assert result.get("output", {}).get("value") == 1
@pytest.mark.asyncio
async def test_presence_probe_emits_contextual_direct_prompt(monkeypatch):
pipeline, events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
pipeline.apply_runtime_overrides(
{
"presenceProbe": {
"enabled": True,
"idleSeconds": 5,
"cooldownSeconds": 5,
"maxPrompts": 2,
"includeContext": True,
}
}
)
await pipeline._shutdown_presence_probe_task()
await pipeline.conversation.add_assistant_turn("请把你的订单号告诉我,我继续帮你处理。")
pipeline._last_user_activity_ms = (time.monotonic() * 1000.0) - 8000.0
fired = await pipeline._run_presence_probe_once()
assert fired is True
probe_text_events = [e for e in events if e.get("type") == "assistant.response.final"]
assert probe_text_events
assert "订单号" in str(probe_text_events[-1].get("text") or "")
assert any(e.get("type") == "output.audio.start" for e in events)
assert not any(e.get("type") == "assistant.tool_call" for e in events)
@pytest.mark.asyncio
async def test_presence_probe_respects_max_prompts_limit(monkeypatch):
pipeline, events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
pipeline.apply_runtime_overrides(
{
"presenceProbe": {
"enabled": True,
"idleSeconds": 5,
"cooldownSeconds": 5,
"maxPrompts": 1,
"waitForResponse": False,
}
}
)
await pipeline._shutdown_presence_probe_task()
await pipeline.conversation.add_assistant_turn("我们继续。")
pipeline._last_user_activity_ms = (time.monotonic() * 1000.0) - 8000.0
first_fired = await pipeline._run_presence_probe_once()
second_fired = await pipeline._run_presence_probe_once(
now_ms=(time.monotonic() * 1000.0) + 10000.0
)
assert first_fired is True
assert second_fired is False
assert len([e for e in events if e.get("type") == "assistant.response.final"]) == 1
@pytest.mark.asyncio
async def test_presence_probe_text_mode_emits_text_only(monkeypatch):
pipeline, events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
pipeline.apply_runtime_overrides(
{
"output": {"mode": "text"},
"presenceProbe": {
"enabled": True,
"idleSeconds": 5,
"cooldownSeconds": 5,
"maxPrompts": 1,
"waitForResponse": False,
},
}
)
await pipeline._shutdown_presence_probe_task()
await pipeline.conversation.add_assistant_turn("我们继续。")
pipeline._last_user_activity_ms = (time.monotonic() * 1000.0) - 8000.0
fired = await pipeline._run_presence_probe_once()
assert fired is True
assert any(e.get("type") == "assistant.response.final" for e in events)
assert not any(e.get("type") == "assistant.tool_call" for e in events)
assert not any(e.get("type") == "output.audio.start" for e in events)
@pytest.mark.asyncio
async def test_server_calculator_emits_tool_result(monkeypatch):
pipeline, events = _build_pipeline(