diff --git a/engine/core/duplex_pipeline.py b/engine/core/duplex_pipeline.py
index 6359011..0a391e7 100644
--- a/engine/core/duplex_pipeline.py
+++ b/engine/core/duplex_pipeline.py
@@ -382,6 +382,7 @@ class DuplexPipeline:
         self._last_llm_delta_emit_ms: float = 0.0
         now_ms = time.monotonic() * 1000.0
         self._last_user_activity_ms: float = now_ms
+        self._last_assistant_activity_ms: float = now_ms
         self._last_presence_probe_ms: float = 0.0
         self._presence_probe_attempts: int = 0
         self._presence_probe_seq: int = 0
@@ -947,6 +948,9 @@ class DuplexPipeline:
         self._last_user_activity_ms = time.monotonic() * 1000.0
         self._active_presence_probe_call_id = None
 
+    def _touch_assistant_activity(self) -> None:
+        self._last_assistant_activity_ms = time.monotonic() * 1000.0
+
     def _presence_probe_in_progress(self) -> bool:
         return bool(self._active_presence_probe_call_id)
 
@@ -970,7 +974,8 @@ class DuplexPipeline:
             return False
         if self.conversation.turn_count <= 0:
             return False
-        if now_ms - self._last_user_activity_ms < self._presence_probe_idle_ms():
+        last_activity_ms = max(self._last_user_activity_ms, self._last_assistant_activity_ms)
+        if now_ms - last_activity_ms < self._presence_probe_idle_ms():
             return False
         if self._last_presence_probe_ms > 0.0 and now_ms - self._last_presence_probe_ms < self._presence_probe_cooldown_ms():
             return False
@@ -984,12 +989,52 @@ class DuplexPipeline:
             return compact
         return f"{compact[:self._PRESENCE_PROBE_CONTEXT_CHARS]}..."
 
-    def _build_presence_probe_question(self) -> str:
+    async def _build_presence_probe_question(self) -> str:
         manual_question = str(self._runtime_presence_probe.get("question") or "").strip()
         if manual_question:
             return manual_question
 
         include_context = bool(self._runtime_presence_probe.get("includeContext", True))
+        if include_context and self.llm_service:
+            last_user = self._clip_presence_context(self.conversation.last_user_text or "")
+            last_assistant = self._clip_presence_context(self.conversation.last_assistant_text or "")
+            context_lines: List[str] = []
+            if last_user:
+                context_lines.append(f"用户：{last_user}")
+            if last_assistant:
+                context_lines.append(f"助手：{last_assistant}")
+            context_blob = "\n".join(context_lines).strip()
+            try:
+                generated = await self.llm_service.generate(
+                    [
+                        LLMMessage(
+                            role="system",
+                            content=(
+                                "你是语音助手。目标是在用户长时间静默时发起一次自然、礼貌、简短的在线确认。"
+                                "请只输出一句中文问句，不要解释，不要使用引号，不要使用 markdown。"
+                                "优先沿用最近上下文，不要机械重复固定模板。"
+                            ),
+                        ),
+                        LLMMessage(
+                            role="user",
+                            content=(
+                                "请基于以下最近对话，生成一句在线确认问句（不超过22个汉字）。\n"
+                                f"{context_blob or '（无明显上下文）'}"
+                            ),
+                        ),
+                    ],
+                    temperature=0.7,
+                    max_tokens=64,
+                )
+                cleaned = str(generated or "").strip().strip('"').strip("'")
+                if cleaned:
+                    cleaned = cleaned.replace("\n", " ").strip()
+                    if len(cleaned) > 60:
+                        cleaned = cleaned[:60]
+                    return cleaned
+            except Exception as exc:
+                logger.warning(f"Presence probe LLM question generation failed: {exc}")
+
         if include_context:
             last_assistant = self._clip_presence_context(self.conversation.last_assistant_text or "")
             if last_assistant:
@@ -1010,10 +1055,11 @@ class DuplexPipeline:
         self._last_presence_probe_ms = current_ms
         self._presence_probe_attempts += 1
 
-        question = self._build_presence_probe_question()
+        question = await self._build_presence_probe_question()
         probe_turn_id = self._start_turn()
         probe_response_id = self._start_response()
         try:
+            await self.conversation.add_assistant_turn(question)
             await self._send_event(
                 {
                     **ev(
@@ -1028,11 +1074,12 @@ class DuplexPipeline:
             )
             if self._tts_output_enabled():
                 await self._speak(question, audio_event_priority=30)
+            self._touch_assistant_activity()
 
             logger.info(
                 "[PresenceProbe] sent probe_id={} idle_ms={} question={}",
                 probe_id,
-                int(max(0.0, current_ms - self._last_user_activity_ms)),
+                int(max(0.0, current_ms - max(self._last_user_activity_ms, self._last_assistant_activity_ms))),
                 question,
             )
             return True
@@ -1300,6 +1347,7 @@ class DuplexPipeline:
         if self._tts_output_enabled() and not used_preloaded_audio:
             # Keep opener text ahead of opener voice start.
             await self._speak(greeting_to_speak, audio_event_priority=30)
+        self._touch_assistant_activity()
 
     async def _play_preloaded_opener_audio(self) -> bool:
         """
@@ -2564,6 +2612,7 @@ class DuplexPipeline:
             self._barge_in_silence_frames = 0
             self._current_response_id = None
             self._current_tts_id = None
+            self._touch_assistant_activity()
 
     async def _speak_sentence(
         self,
diff --git a/engine/tests/test_tool_call_flow.py b/engine/tests/test_tool_call_flow.py
index c691c39..8b1abb3 100644
--- a/engine/tests/test_tool_call_flow.py
+++ b/engine/tests/test_tool_call_flow.py
@@ -62,6 +62,12 @@ class _FakeLLM:
         self._rounds = rounds
         self._call_index = 0
 
+    async def generate(self, messages, temperature=0.7, max_tokens=None):
+        prompt = " ".join([str(getattr(m, "content", "")) for m in messages])
+        if "订单号" in prompt:
+            return "关于订单号这块，你还在线吗？"
+        return "你还在线吗？"
+
     async def generate_stream(self, _messages, temperature=0.7, max_tokens=None):
         idx = self._call_index
         self._call_index += 1
@@ -384,7 +390,9 @@ async def test_presence_probe_emits_contextual_direct_prompt(monkeypatch):
     )
     await pipeline._shutdown_presence_probe_task()
     await pipeline.conversation.add_assistant_turn("请把你的订单号告诉我，我继续帮你处理。")
-    pipeline._last_user_activity_ms = (time.monotonic() * 1000.0) - 8000.0
+    now_ms = time.monotonic() * 1000.0
+    pipeline._last_user_activity_ms = now_ms - 8000.0
+    pipeline._last_assistant_activity_ms = now_ms - 8000.0
 
     fired = await pipeline._run_presence_probe_once()
 
@@ -392,6 +400,7 @@ async def test_presence_probe_emits_contextual_direct_prompt(monkeypatch):
     probe_text_events = [e for e in events if e.get("type") == "assistant.response.final"]
     assert probe_text_events
     assert "订单号" in str(probe_text_events[-1].get("text") or "")
+    assert "订单号" in str(pipeline.conversation.last_assistant_text or "")
     assert any(e.get("type") == "output.audio.start" for e in events)
     assert not any(e.get("type") == "assistant.tool_call" for e in events)
 
@@ -412,7 +421,9 @@ async def test_presence_probe_respects_max_prompts_limit(monkeypatch):
     )
     await pipeline._shutdown_presence_probe_task()
     await pipeline.conversation.add_assistant_turn("我们继续。")
-    pipeline._last_user_activity_ms = (time.monotonic() * 1000.0) - 8000.0
+    now_ms = time.monotonic() * 1000.0
+    pipeline._last_user_activity_ms = now_ms - 8000.0
+    pipeline._last_assistant_activity_ms = now_ms - 8000.0
 
     first_fired = await pipeline._run_presence_probe_once()
     second_fired = await pipeline._run_presence_probe_once(
@@ -441,7 +452,9 @@ async def test_presence_probe_text_mode_emits_text_only(monkeypatch):
     )
     await pipeline._shutdown_presence_probe_task()
     await pipeline.conversation.add_assistant_turn("我们继续。")
-    pipeline._last_user_activity_ms = (time.monotonic() * 1000.0) - 8000.0
+    now_ms = time.monotonic() * 1000.0
+    pipeline._last_user_activity_ms = now_ms - 8000.0
+    pipeline._last_assistant_activity_ms = now_ms - 8000.0
 
     fired = await pipeline._run_presence_probe_once()
 
@@ -451,6 +464,28 @@ async def test_presence_probe_text_mode_emits_text_only(monkeypatch):
     assert not any(e.get("type") == "output.audio.start" for e in events)
 
 
+@pytest.mark.asyncio
+async def test_presence_probe_does_not_count_assistant_speaking_time_as_idle(monkeypatch):
+    pipeline, _events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
+    pipeline.apply_runtime_overrides(
+        {
+            "presenceProbe": {
+                "enabled": True,
+                "idleSeconds": 10,
+                "cooldownSeconds": 5,
+                "maxPrompts": 1,
+            }
+        }
+    )
+    await pipeline._shutdown_presence_probe_task()
+    await pipeline.conversation.add_assistant_turn("我们继续。")
+    now_ms = time.monotonic() * 1000.0
+    pipeline._last_user_activity_ms = now_ms - 30_000.0
+    pipeline._last_assistant_activity_ms = now_ms - 2_000.0
+
+    assert pipeline._presence_probe_due(now_ms) is False
+
+
 @pytest.mark.asyncio
 async def test_server_calculator_emits_tool_result(monkeypatch):
     pipeline, events = _build_pipeline(
diff --git a/web/pages/Assistants.tsx b/web/pages/Assistants.tsx
index 3b36944..21d6853 100644
--- a/web/pages/Assistants.tsx
+++ b/web/pages/Assistants.tsx
@@ -3815,8 +3815,15 @@ export const DebugDrawer: React.FC<{
             if (!finalText) return prev;
             const last = prev[prev.length - 1];
             if (last?.role === 'model') {
-              if (last.text === finalText) return prev;
-              if (finalText.startsWith(last.text) || last.text.startsWith(finalText)) {
+              const sameResponse = Boolean(
+                responseId
+                && last.responseId
+                && responseId === last.responseId
+              );
+              const bothWithoutResponseId = !responseId && !last.responseId;
+              const canMergeTail = sameResponse || bothWithoutResponseId;
+              if (canMergeTail && last.text === finalText) return prev;
+              if (canMergeTail && (finalText.startsWith(last.text) || last.text.startsWith(finalText))) {
                 const next = [...prev];
                 next[next.length - 1] = { ...last, text: finalText };
                 return next;