Fix fastgpt client tool 3 rounds bugs

2026-03-11 11:33:27 +08:00
parent f3612a710d
commit 9b9fbf432f
4 changed files with 201 additions and 4 deletions
--- a/engine/runtime/pipeline/duplex.py
+++ b/engine/runtime/pipeline/duplex.py
@@ -73,6 +73,8 @@ class DuplexPipeline:
    _MIN_SPLIT_SPOKEN_CHARS = 6
    _TOOL_WAIT_TIMEOUT_SECONDS = 60.0
    _SERVER_TOOL_TIMEOUT_SECONDS = 15.0
+    _MAX_LLM_ROUNDS = 3
+    _MAX_PROVIDER_MANAGED_ROUNDS = 24
    TRACK_AUDIO_IN = "audio_in"
    TRACK_AUDIO_OUT = "audio_out"
    TRACK_CONTROL = "control"
@@ -408,6 +410,7 @@ class DuplexPipeline:
        self._runtime_tool_display_names: Dict[str, str] = {}
        self._runtime_tool_wait_for_response: Dict[str, bool] = {}
        self._pending_tool_waiters: Dict[str, asyncio.Future] = {}
+        self._pending_tool_deadlines: Dict[str, float] = {}
        self._early_tool_results: Dict[str, Dict[str, Any]] = {}
        self._completed_tool_call_ids: set[str] = set()
        self._pending_client_tool_call_ids: set[str] = set()
@@ -2236,6 +2239,7 @@ class DuplexPipeline:
        future = loop.create_future()
        self._pending_tool_waiters[call_id] = future
        timeout = timeout_seconds if isinstance(timeout_seconds, (int, float)) and timeout_seconds > 0 else self._TOOL_WAIT_TIMEOUT_SECONDS
+        self._pending_tool_deadlines[call_id] = time.monotonic() + timeout
        try:
            return await asyncio.wait_for(future, timeout=timeout)
        except asyncio.TimeoutError:
@@ -2247,8 +2251,14 @@ class DuplexPipeline:
            }
        finally:
            self._pending_tool_waiters.pop(call_id, None)
+            self._pending_tool_deadlines.pop(call_id, None)
            self._pending_client_tool_call_ids.discard(call_id)

+    def pending_client_tool_deadline(self) -> Optional[float]:
+        if not self._pending_tool_deadlines:
+            return None
+        return max(self._pending_tool_deadlines.values())
+
    def _normalize_stream_event(self, item: Any) -> LLMStreamEvent:
        if isinstance(item, LLMStreamEvent):
            return item
@@ -2289,7 +2299,8 @@ class DuplexPipeline:
            messages = self.conversation.get_messages()
            if system_context and system_context.strip():
                messages = [*messages, LLMMessage(role="system", content=system_context.strip())]
-            max_rounds = 3
+            llm_rounds = 0
+            provider_rounds_remaining = self._MAX_PROVIDER_MANAGED_ROUNDS

            await self.conversation.start_assistant_turn()
            self._is_bot_speaking = True
@@ -2300,10 +2311,27 @@ class DuplexPipeline:
            self._pending_llm_delta = ""
            self._last_llm_delta_emit_ms = 0.0
            pending_provider_stream = None
-            for _ in range(max_rounds):
+            while True:
                if self._interrupt_event.is_set():
                    break

+                if pending_provider_stream is not None:
+                    if provider_rounds_remaining <= 0:
+                        logger.warning(
+                            "Provider-managed tool chain exceeded {} rounds; ending turn early",
+                            self._MAX_PROVIDER_MANAGED_ROUNDS,
+                        )
+                        break
+                    provider_rounds_remaining -= 1
+                else:
+                    if llm_rounds >= self._MAX_LLM_ROUNDS:
+                        logger.warning(
+                            "LLM tool planning exceeded {} rounds; ending turn early",
+                            self._MAX_LLM_ROUNDS,
+                        )
+                        break
+                    llm_rounds += 1
+
                sentence_buffer = ""
                pending_punctuation = ""
                round_response = ""