Add fastgpt as seperate assistant mode

2026-03-11 08:37:34 +08:00
parent 13684d498b
commit f3612a710d
26 changed files with 2333 additions and 210 deletions
--- a/engine/runtime/pipeline/duplex.py
+++ b/engine/runtime/pipeline/duplex.py
@@ -594,6 +594,7 @@ class DuplexPipeline:
                    "provider": llm_provider,
                    "model": str(self._runtime_llm.get("model") or settings.llm_model),
                    "baseUrl": llm_base_url,
+                    "appId": str(self._runtime_llm.get("appId") or ""),
                },
                "asr": {
                    "provider": asr_provider,
@@ -937,6 +938,19 @@ class DuplexPipeline:
            return None
        return text.strip().strip('"').strip("'")

+    async def _resolve_provider_initial_greeting(self) -> Optional[str]:
+        if not self.llm_service or not hasattr(self.llm_service, "get_initial_greeting"):
+            return None
+
+        try:
+            greeting = await self.llm_service.get_initial_greeting()
+        except Exception as exc:
+            logger.warning("Failed to load provider initial greeting: {}", exc)
+            return None
+
+        text = str(greeting or "").strip()
+        return text or None
+
    async def start(self) -> None:
        """Start the pipeline and connect services."""
        try:
@@ -956,6 +970,7 @@ class DuplexPipeline:
                        model=str(llm_model),
                        api_key=str(llm_api_key).strip() if llm_api_key else None,
                        base_url=str(llm_base_url).strip() if llm_base_url else None,
+                        app_id=str(self._runtime_llm.get("appId")).strip() if self._runtime_llm.get("appId") else None,
                        system_prompt=self.conversation.system_prompt,
                        temperature=settings.llm_temperature,
                        knowledge_config=self._resolved_knowledge_config(),
@@ -1096,7 +1111,11 @@ class DuplexPipeline:
        if not self._bot_starts_first():
            return

-        if self._generated_opener_enabled() and self._resolved_tool_schemas():
+        provider_greeting = await self._resolve_provider_initial_greeting()
+        if provider_greeting:
+            self.conversation.greeting = provider_greeting
+
+        if not provider_greeting and self._generated_opener_enabled() and self._resolved_tool_schemas():
            # Run generated opener as a normal tool-capable assistant turn.
            # Use an empty user input so the opener can be driven by system prompt policy.
            if self._current_turn_task and not self._current_turn_task.done():
@@ -1107,13 +1126,13 @@ class DuplexPipeline:
            return

        manual_opener_execution: Dict[str, List[Dict[str, Any]]] = {"toolCalls": [], "toolResults": []}
-        if not self._generated_opener_enabled() and self._resolved_manual_opener_tool_calls():
+        if not provider_greeting and not self._generated_opener_enabled() and self._resolved_manual_opener_tool_calls():
            self._start_turn()
            self._start_response()
            manual_opener_execution = await self._execute_manual_opener_tool_calls()

        greeting_to_speak = self.conversation.greeting
-        if self._generated_opener_enabled():
+        if not provider_greeting and self._generated_opener_enabled():
            generated_greeting = await self._generate_runtime_greeting()
            if generated_greeting:
                greeting_to_speak = generated_greeting
@@ -1954,12 +1973,35 @@ class DuplexPipeline:
        return bool(self._runtime_tool_wait_for_response.get(normalized, False))

    def _tool_executor(self, tool_call: Dict[str, Any]) -> str:
+        explicit_executor = str(tool_call.get("executor") or "").strip().lower()
+        if explicit_executor in {"client", "server"}:
+            return explicit_executor
        name = self._tool_name(tool_call)
        if name and name in self._runtime_tool_executor:
            return self._runtime_tool_executor[name]
        # Default to server execution unless explicitly marked as client.
        return "server"

+    def _tool_wait_for_response_for_call(self, tool_name: str, tool_call: Dict[str, Any]) -> bool:
+        explicit_wait = tool_call.get("wait_for_response")
+        if explicit_wait is None:
+            explicit_wait = tool_call.get("waitForResponse")
+        if isinstance(explicit_wait, bool):
+            return explicit_wait
+        return self._tool_wait_for_response(tool_name)
+
+    def _tool_timeout_ms(self, tool_call: Dict[str, Any]) -> int:
+        raw_timeout = tool_call.get("timeout_ms")
+        if raw_timeout is None:
+            raw_timeout = tool_call.get("timeoutMs")
+        try:
+            timeout_ms = int(raw_timeout)
+        except (TypeError, ValueError):
+            timeout_ms = 0
+        if timeout_ms > 0:
+            return timeout_ms
+        return int(self._TOOL_WAIT_TIMEOUT_SECONDS * 1000)
+
    def _tool_arguments(self, tool_call: Dict[str, Any]) -> Dict[str, Any]:
        fn = tool_call.get("function")
        if not isinstance(fn, dict):
@@ -2179,7 +2221,7 @@ class DuplexPipeline:
            self._early_tool_results[call_id] = item
            self._completed_tool_call_ids.add(call_id)

-    async def _wait_for_single_tool_result(self, call_id: str) -> Dict[str, Any]:
+    async def _wait_for_single_tool_result(self, call_id: str, timeout_seconds: Optional[float] = None) -> Dict[str, Any]:
        if call_id in self._completed_tool_call_ids and call_id not in self._early_tool_results:
            return {
                "tool_call_id": call_id,
@@ -2193,8 +2235,9 @@ class DuplexPipeline:
        loop = asyncio.get_running_loop()
        future = loop.create_future()
        self._pending_tool_waiters[call_id] = future
+        timeout = timeout_seconds if isinstance(timeout_seconds, (int, float)) and timeout_seconds > 0 else self._TOOL_WAIT_TIMEOUT_SECONDS
        try:
-            return await asyncio.wait_for(future, timeout=self._TOOL_WAIT_TIMEOUT_SECONDS)
+            return await asyncio.wait_for(future, timeout=timeout)
        except asyncio.TimeoutError:
            self._completed_tool_call_ids.add(call_id)
            return {
@@ -2256,6 +2299,7 @@ class DuplexPipeline:
            first_audio_sent = False
            self._pending_llm_delta = ""
            self._last_llm_delta_emit_ms = 0.0
+            pending_provider_stream = None
            for _ in range(max_rounds):
                if self._interrupt_event.is_set():
                    break
@@ -2267,7 +2311,10 @@ class DuplexPipeline:
                allow_text_output = True
                use_engine_sentence_split = self._use_engine_sentence_split_for_tts()

-                async for raw_event in self.llm_service.generate_stream(messages):
+                stream_iter = pending_provider_stream if pending_provider_stream is not None else self.llm_service.generate_stream(messages)
+                pending_provider_stream = None
+
+                async for raw_event in stream_iter:
                    if self._interrupt_event.is_set():
                        break

@@ -2282,14 +2329,21 @@ class DuplexPipeline:
                        if not tool_call:
                            continue
                        allow_text_output = False
+                        tool_name = self._tool_name(tool_call) or "unknown_tool"
                        executor = self._tool_executor(tool_call)
                        enriched_tool_call = dict(tool_call)
                        enriched_tool_call["executor"] = executor
-                        tool_name = self._tool_name(enriched_tool_call) or "unknown_tool"
                        tool_id = self._tool_id_for_name(tool_name)
-                        tool_display_name = self._tool_display_name(tool_name) or tool_name
-                        wait_for_response = self._tool_wait_for_response(tool_name)
+                        tool_display_name = str(
+                            enriched_tool_call.get("displayName")
+                            or enriched_tool_call.get("display_name")
+                            or self._tool_display_name(tool_name)
+                            or tool_name
+                        ).strip()
+                        wait_for_response = self._tool_wait_for_response_for_call(tool_name, enriched_tool_call)
                        enriched_tool_call["wait_for_response"] = wait_for_response
+                        timeout_ms = self._tool_timeout_ms(enriched_tool_call)
+                        enriched_tool_call["timeout_ms"] = timeout_ms
                        call_id = str(enriched_tool_call.get("id") or "").strip()
                        fn_payload = (
                            dict(enriched_tool_call.get("function"))
@@ -2298,6 +2352,15 @@ class DuplexPipeline:
                        )
                        raw_args = str(fn_payload.get("arguments") or "") if isinstance(fn_payload, dict) else ""
                        tool_arguments = self._tool_arguments(enriched_tool_call)
+                        if tool_name == "fastgpt.interactive":
+                            context_payload = (
+                                dict(tool_arguments.get("context"))
+                                if isinstance(tool_arguments.get("context"), dict)
+                                else {}
+                            )
+                            context_payload.setdefault("turn_id", turn_id)
+                            context_payload.setdefault("response_id", response_id)
+                            tool_arguments["context"] = context_payload
                        merged_tool_arguments = self._apply_tool_default_args(tool_name, tool_arguments)
                        try:
                            merged_args_text = json.dumps(merged_tool_arguments, ensure_ascii=False)
@@ -2324,9 +2387,9 @@ class DuplexPipeline:
                                    tool_id=tool_id,
                                    tool_display_name=tool_display_name,
                                    wait_for_response=wait_for_response,
-                                    arguments=tool_arguments,
+                                    arguments=merged_tool_arguments,
                                    executor=executor,
-                                    timeout_ms=int(self._TOOL_WAIT_TIMEOUT_SECONDS * 1000),
+                                    timeout_ms=timeout_ms,
                                    tool_call=enriched_tool_call,
                                )
                            },
@@ -2457,6 +2520,8 @@ class DuplexPipeline:
                    break

                tool_results: List[Dict[str, Any]] = []
+                provider_managed_tool = False
+                provider_resumed = False
                for call in tool_calls:
                    call_id = str(call.get("id") or "").strip()
                    if not call_id:
@@ -2466,9 +2531,27 @@ class DuplexPipeline:
                    tool_id = self._tool_id_for_name(tool_name)
                    logger.info(f"[Tool] execute start name={tool_name} call_id={call_id} executor={executor}")
                    if executor == "client":
-                        result = await self._wait_for_single_tool_result(call_id)
+                        timeout_ms = self._tool_timeout_ms(call)
+                        result = await self._wait_for_single_tool_result(
+                            call_id,
+                            timeout_seconds=(timeout_ms / 1000.0),
+                        )
                        await self._emit_tool_result(result, source="client")
                        tool_results.append(result)
+                        if (
+                            hasattr(self.llm_service, "handles_client_tool")
+                            and hasattr(self.llm_service, "resume_after_client_tool_result")
+                            and self.llm_service.handles_client_tool(tool_name)
+                        ):
+                            provider_managed_tool = True
+                            status = result.get("status") if isinstance(result.get("status"), dict) else {}
+                            status_code = int(status.get("code") or 0) if status else 0
+                            output = result.get("output") if isinstance(result.get("output"), dict) else {}
+                            action = str(output.get("action") or "").strip().lower()
+                            if 200 <= status_code < 300 and action != "cancel":
+                                pending_provider_stream = self.llm_service.resume_after_client_tool_result(call_id, result)
+                                provider_resumed = True
+                            break
                        continue

                    call_for_executor = dict(call)
@@ -2495,6 +2578,11 @@ class DuplexPipeline:
                    await self._emit_tool_result(result, source="server")
                    tool_results.append(result)

+                if provider_resumed:
+                    continue
+                if provider_managed_tool:
+                    break
+
                messages = [
                    *messages,
                    LLMMessage(