Add fastgpt as seperate assistant mode

This commit is contained in:
Xin Wang
2026-03-11 08:37:34 +08:00
parent 13684d498b
commit f3612a710d
26 changed files with 2333 additions and 210 deletions

View File

@@ -594,6 +594,7 @@ class DuplexPipeline:
"provider": llm_provider,
"model": str(self._runtime_llm.get("model") or settings.llm_model),
"baseUrl": llm_base_url,
"appId": str(self._runtime_llm.get("appId") or ""),
},
"asr": {
"provider": asr_provider,
@@ -937,6 +938,19 @@ class DuplexPipeline:
return None
return text.strip().strip('"').strip("'")
async def _resolve_provider_initial_greeting(self) -> Optional[str]:
if not self.llm_service or not hasattr(self.llm_service, "get_initial_greeting"):
return None
try:
greeting = await self.llm_service.get_initial_greeting()
except Exception as exc:
logger.warning("Failed to load provider initial greeting: {}", exc)
return None
text = str(greeting or "").strip()
return text or None
async def start(self) -> None:
"""Start the pipeline and connect services."""
try:
@@ -956,6 +970,7 @@ class DuplexPipeline:
model=str(llm_model),
api_key=str(llm_api_key).strip() if llm_api_key else None,
base_url=str(llm_base_url).strip() if llm_base_url else None,
app_id=str(self._runtime_llm.get("appId")).strip() if self._runtime_llm.get("appId") else None,
system_prompt=self.conversation.system_prompt,
temperature=settings.llm_temperature,
knowledge_config=self._resolved_knowledge_config(),
@@ -1096,7 +1111,11 @@ class DuplexPipeline:
if not self._bot_starts_first():
return
if self._generated_opener_enabled() and self._resolved_tool_schemas():
provider_greeting = await self._resolve_provider_initial_greeting()
if provider_greeting:
self.conversation.greeting = provider_greeting
if not provider_greeting and self._generated_opener_enabled() and self._resolved_tool_schemas():
# Run generated opener as a normal tool-capable assistant turn.
# Use an empty user input so the opener can be driven by system prompt policy.
if self._current_turn_task and not self._current_turn_task.done():
@@ -1107,13 +1126,13 @@ class DuplexPipeline:
return
manual_opener_execution: Dict[str, List[Dict[str, Any]]] = {"toolCalls": [], "toolResults": []}
if not self._generated_opener_enabled() and self._resolved_manual_opener_tool_calls():
if not provider_greeting and not self._generated_opener_enabled() and self._resolved_manual_opener_tool_calls():
self._start_turn()
self._start_response()
manual_opener_execution = await self._execute_manual_opener_tool_calls()
greeting_to_speak = self.conversation.greeting
if self._generated_opener_enabled():
if not provider_greeting and self._generated_opener_enabled():
generated_greeting = await self._generate_runtime_greeting()
if generated_greeting:
greeting_to_speak = generated_greeting
@@ -1954,12 +1973,35 @@ class DuplexPipeline:
return bool(self._runtime_tool_wait_for_response.get(normalized, False))
def _tool_executor(self, tool_call: Dict[str, Any]) -> str:
explicit_executor = str(tool_call.get("executor") or "").strip().lower()
if explicit_executor in {"client", "server"}:
return explicit_executor
name = self._tool_name(tool_call)
if name and name in self._runtime_tool_executor:
return self._runtime_tool_executor[name]
# Default to server execution unless explicitly marked as client.
return "server"
def _tool_wait_for_response_for_call(self, tool_name: str, tool_call: Dict[str, Any]) -> bool:
explicit_wait = tool_call.get("wait_for_response")
if explicit_wait is None:
explicit_wait = tool_call.get("waitForResponse")
if isinstance(explicit_wait, bool):
return explicit_wait
return self._tool_wait_for_response(tool_name)
def _tool_timeout_ms(self, tool_call: Dict[str, Any]) -> int:
raw_timeout = tool_call.get("timeout_ms")
if raw_timeout is None:
raw_timeout = tool_call.get("timeoutMs")
try:
timeout_ms = int(raw_timeout)
except (TypeError, ValueError):
timeout_ms = 0
if timeout_ms > 0:
return timeout_ms
return int(self._TOOL_WAIT_TIMEOUT_SECONDS * 1000)
def _tool_arguments(self, tool_call: Dict[str, Any]) -> Dict[str, Any]:
fn = tool_call.get("function")
if not isinstance(fn, dict):
@@ -2179,7 +2221,7 @@ class DuplexPipeline:
self._early_tool_results[call_id] = item
self._completed_tool_call_ids.add(call_id)
async def _wait_for_single_tool_result(self, call_id: str) -> Dict[str, Any]:
async def _wait_for_single_tool_result(self, call_id: str, timeout_seconds: Optional[float] = None) -> Dict[str, Any]:
if call_id in self._completed_tool_call_ids and call_id not in self._early_tool_results:
return {
"tool_call_id": call_id,
@@ -2193,8 +2235,9 @@ class DuplexPipeline:
loop = asyncio.get_running_loop()
future = loop.create_future()
self._pending_tool_waiters[call_id] = future
timeout = timeout_seconds if isinstance(timeout_seconds, (int, float)) and timeout_seconds > 0 else self._TOOL_WAIT_TIMEOUT_SECONDS
try:
return await asyncio.wait_for(future, timeout=self._TOOL_WAIT_TIMEOUT_SECONDS)
return await asyncio.wait_for(future, timeout=timeout)
except asyncio.TimeoutError:
self._completed_tool_call_ids.add(call_id)
return {
@@ -2256,6 +2299,7 @@ class DuplexPipeline:
first_audio_sent = False
self._pending_llm_delta = ""
self._last_llm_delta_emit_ms = 0.0
pending_provider_stream = None
for _ in range(max_rounds):
if self._interrupt_event.is_set():
break
@@ -2267,7 +2311,10 @@ class DuplexPipeline:
allow_text_output = True
use_engine_sentence_split = self._use_engine_sentence_split_for_tts()
async for raw_event in self.llm_service.generate_stream(messages):
stream_iter = pending_provider_stream if pending_provider_stream is not None else self.llm_service.generate_stream(messages)
pending_provider_stream = None
async for raw_event in stream_iter:
if self._interrupt_event.is_set():
break
@@ -2282,14 +2329,21 @@ class DuplexPipeline:
if not tool_call:
continue
allow_text_output = False
tool_name = self._tool_name(tool_call) or "unknown_tool"
executor = self._tool_executor(tool_call)
enriched_tool_call = dict(tool_call)
enriched_tool_call["executor"] = executor
tool_name = self._tool_name(enriched_tool_call) or "unknown_tool"
tool_id = self._tool_id_for_name(tool_name)
tool_display_name = self._tool_display_name(tool_name) or tool_name
wait_for_response = self._tool_wait_for_response(tool_name)
tool_display_name = str(
enriched_tool_call.get("displayName")
or enriched_tool_call.get("display_name")
or self._tool_display_name(tool_name)
or tool_name
).strip()
wait_for_response = self._tool_wait_for_response_for_call(tool_name, enriched_tool_call)
enriched_tool_call["wait_for_response"] = wait_for_response
timeout_ms = self._tool_timeout_ms(enriched_tool_call)
enriched_tool_call["timeout_ms"] = timeout_ms
call_id = str(enriched_tool_call.get("id") or "").strip()
fn_payload = (
dict(enriched_tool_call.get("function"))
@@ -2298,6 +2352,15 @@ class DuplexPipeline:
)
raw_args = str(fn_payload.get("arguments") or "") if isinstance(fn_payload, dict) else ""
tool_arguments = self._tool_arguments(enriched_tool_call)
if tool_name == "fastgpt.interactive":
context_payload = (
dict(tool_arguments.get("context"))
if isinstance(tool_arguments.get("context"), dict)
else {}
)
context_payload.setdefault("turn_id", turn_id)
context_payload.setdefault("response_id", response_id)
tool_arguments["context"] = context_payload
merged_tool_arguments = self._apply_tool_default_args(tool_name, tool_arguments)
try:
merged_args_text = json.dumps(merged_tool_arguments, ensure_ascii=False)
@@ -2324,9 +2387,9 @@ class DuplexPipeline:
tool_id=tool_id,
tool_display_name=tool_display_name,
wait_for_response=wait_for_response,
arguments=tool_arguments,
arguments=merged_tool_arguments,
executor=executor,
timeout_ms=int(self._TOOL_WAIT_TIMEOUT_SECONDS * 1000),
timeout_ms=timeout_ms,
tool_call=enriched_tool_call,
)
},
@@ -2457,6 +2520,8 @@ class DuplexPipeline:
break
tool_results: List[Dict[str, Any]] = []
provider_managed_tool = False
provider_resumed = False
for call in tool_calls:
call_id = str(call.get("id") or "").strip()
if not call_id:
@@ -2466,9 +2531,27 @@ class DuplexPipeline:
tool_id = self._tool_id_for_name(tool_name)
logger.info(f"[Tool] execute start name={tool_name} call_id={call_id} executor={executor}")
if executor == "client":
result = await self._wait_for_single_tool_result(call_id)
timeout_ms = self._tool_timeout_ms(call)
result = await self._wait_for_single_tool_result(
call_id,
timeout_seconds=(timeout_ms / 1000.0),
)
await self._emit_tool_result(result, source="client")
tool_results.append(result)
if (
hasattr(self.llm_service, "handles_client_tool")
and hasattr(self.llm_service, "resume_after_client_tool_result")
and self.llm_service.handles_client_tool(tool_name)
):
provider_managed_tool = True
status = result.get("status") if isinstance(result.get("status"), dict) else {}
status_code = int(status.get("code") or 0) if status else 0
output = result.get("output") if isinstance(result.get("output"), dict) else {}
action = str(output.get("action") or "").strip().lower()
if 200 <= status_code < 300 and action != "cancel":
pending_provider_stream = self.llm_service.resume_after_client_tool_result(call_id, result)
provider_resumed = True
break
continue
call_for_executor = dict(call)
@@ -2495,6 +2578,11 @@ class DuplexPipeline:
await self._emit_tool_result(result, source="server")
tool_results.append(result)
if provider_resumed:
continue
if provider_managed_tool:
break
messages = [
*messages,
LLMMessage(