Compare commits

...

2 Commits

9 changed files with 501 additions and 68 deletions

View File

@@ -98,6 +98,7 @@ class ToolResource(Base):
http_timeout_ms: Mapped[int] = mapped_column(Integer, default=10000)
parameter_schema: Mapped[dict] = mapped_column(JSON, default=dict)
parameter_defaults: Mapped[dict] = mapped_column(JSON, default=dict)
wait_for_response: Mapped[bool] = mapped_column(default=False)
enabled: Mapped[bool] = mapped_column(default=True)
is_system: Mapped[bool] = mapped_column(default=False)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)

View File

@@ -22,7 +22,13 @@ from ..schemas import (
AssistantOpenerAudioGenerateRequest,
AssistantOpenerAudioOut,
)
from .tools import TOOL_REGISTRY, TOOL_CATEGORY_MAP, TOOL_PARAMETER_DEFAULTS, _ensure_tool_resource_schema
from .tools import (
TOOL_REGISTRY,
TOOL_CATEGORY_MAP,
TOOL_PARAMETER_DEFAULTS,
TOOL_WAIT_FOR_RESPONSE_DEFAULTS,
_ensure_tool_resource_schema,
)
router = APIRouter(prefix="/assistants", tags=["Assistants"])
@@ -142,6 +148,11 @@ def _resolve_runtime_tools(db: Session, selected_tool_ids: List[str], warnings:
)
defaults_raw = resource.parameter_defaults if resource else TOOL_PARAMETER_DEFAULTS.get(tool_id)
defaults = dict(defaults_raw) if isinstance(defaults_raw, dict) else {}
wait_for_response = (
bool(resource.wait_for_response)
if resource
else bool(TOOL_WAIT_FOR_RESPONSE_DEFAULTS.get(tool_id, False))
)
if not resource and tool_id not in TOOL_REGISTRY:
warnings.append(f"Tool resource not found: {tool_id}")
@@ -160,6 +171,7 @@ def _resolve_runtime_tools(db: Session, selected_tool_ids: List[str], warnings:
},
"displayName": display_name or tool_id,
"toolId": tool_id,
"waitForResponse": wait_for_response,
}
if defaults:
runtime_tool["defaultArgs"] = defaults

View File

@@ -98,6 +98,17 @@ TOOL_REGISTRY = {
"required": ["msg"]
}
},
"text_msg_prompt": {
"name": "文本消息提示",
"description": "显示一条文本弹窗提示",
"parameters": {
"type": "object",
"properties": {
"msg": {"type": "string", "description": "提示文本内容"}
},
"required": ["msg"]
}
},
}
TOOL_CATEGORY_MAP = {
@@ -109,6 +120,7 @@ TOOL_CATEGORY_MAP = {
"increase_volume": "system",
"decrease_volume": "system",
"voice_message_prompt": "system",
"text_msg_prompt": "system",
}
TOOL_ICON_MAP = {
@@ -120,6 +132,7 @@ TOOL_ICON_MAP = {
"increase_volume": "Volume2",
"decrease_volume": "Volume2",
"voice_message_prompt": "Volume2",
"text_msg_prompt": "Terminal",
}
TOOL_HTTP_DEFAULTS = {
@@ -130,6 +143,10 @@ TOOL_PARAMETER_DEFAULTS = {
"decrease_volume": {"step": 1},
}
TOOL_WAIT_FOR_RESPONSE_DEFAULTS = {
"text_msg_prompt": True,
}
def _normalize_parameter_schema(value: Any, *, tool_id: Optional[str] = None) -> Dict[str, Any]:
if not isinstance(value, dict):
@@ -177,6 +194,9 @@ def _ensure_tool_resource_schema(db: Session) -> None:
if "parameter_defaults" not in columns:
db.execute(text("ALTER TABLE tool_resources ADD COLUMN parameter_defaults JSON"))
altered = True
if "wait_for_response" not in columns:
db.execute(text("ALTER TABLE tool_resources ADD COLUMN wait_for_response BOOLEAN DEFAULT 0"))
altered = True
if altered:
db.commit()
@@ -222,6 +242,7 @@ def _seed_default_tools_if_empty(db: Session) -> None:
http_timeout_ms=int(http_defaults.get("http_timeout_ms") or 10000),
parameter_schema=_normalize_parameter_schema(payload.get("parameters"), tool_id=tool_id),
parameter_defaults=_normalize_parameter_defaults(TOOL_PARAMETER_DEFAULTS.get(tool_id)),
wait_for_response=bool(TOOL_WAIT_FOR_RESPONSE_DEFAULTS.get(tool_id, False)),
enabled=True,
is_system=True,
))
@@ -311,6 +332,7 @@ def create_tool_resource(data: ToolResourceCreate, db: Session = Depends(get_db)
http_timeout_ms=max(1000, int(data.http_timeout_ms or 10000)),
parameter_schema=parameter_schema,
parameter_defaults=parameter_defaults,
wait_for_response=bool(data.wait_for_response) if data.category == "system" else False,
enabled=data.enabled,
is_system=False,
)
@@ -342,6 +364,8 @@ def update_tool_resource(id: str, data: ToolResourceUpdate, db: Session = Depend
update_data["parameter_schema"] = _normalize_parameter_schema(update_data.get("parameter_schema"), tool_id=id)
if "parameter_defaults" in update_data:
update_data["parameter_defaults"] = _normalize_parameter_defaults(update_data.get("parameter_defaults"))
if new_category != "system":
update_data["wait_for_response"] = False
for field, value in update_data.items():
setattr(item, field, value)

View File

@@ -241,6 +241,7 @@ class ToolResourceBase(BaseModel):
http_timeout_ms: int = 10000
parameter_schema: Dict[str, Any] = Field(default_factory=dict)
parameter_defaults: Dict[str, Any] = Field(default_factory=dict)
wait_for_response: bool = False
enabled: bool = True
@@ -259,6 +260,7 @@ class ToolResourceUpdate(BaseModel):
http_timeout_ms: Optional[int] = None
parameter_schema: Optional[Dict[str, Any]] = None
parameter_defaults: Optional[Dict[str, Any]] = None
wait_for_response: Optional[bool] = None
enabled: Optional[bool] = None

View File

@@ -157,6 +157,17 @@ class DuplexPipeline:
"required": ["msg"],
},
},
"text_msg_prompt": {
"name": "text_msg_prompt",
"description": "Show a text message prompt dialog on client side",
"parameters": {
"type": "object",
"properties": {
"msg": {"type": "string", "description": "Message text to display"},
},
"required": ["msg"],
},
},
}
_DEFAULT_CLIENT_EXECUTORS = frozenset({
"turn_on_camera",
@@ -164,6 +175,7 @@ class DuplexPipeline:
"increase_volume",
"decrease_volume",
"voice_message_prompt",
"text_msg_prompt",
})
def __init__(
@@ -302,6 +314,7 @@ class DuplexPipeline:
self._runtime_tool_default_args: Dict[str, Dict[str, Any]] = {}
self._runtime_tool_id_map: Dict[str, str] = {}
self._runtime_tool_display_names: Dict[str, str] = {}
self._runtime_tool_wait_for_response: Dict[str, bool] = {}
self._pending_tool_waiters: Dict[str, asyncio.Future] = {}
self._early_tool_results: Dict[str, Dict[str, Any]] = {}
self._completed_tool_call_ids: set[str] = set()
@@ -325,6 +338,7 @@ class DuplexPipeline:
self._runtime_tool_default_args = self._resolved_tool_default_args_map()
self._runtime_tool_id_map = self._resolved_tool_id_map()
self._runtime_tool_display_names = self._resolved_tool_display_name_map()
self._runtime_tool_wait_for_response = self._resolved_tool_wait_for_response_map()
self._initial_greeting_emitted = False
if self._server_tool_executor is None:
@@ -429,12 +443,14 @@ class DuplexPipeline:
self._runtime_tool_default_args = self._resolved_tool_default_args_map()
self._runtime_tool_id_map = self._resolved_tool_id_map()
self._runtime_tool_display_names = self._resolved_tool_display_name_map()
self._runtime_tool_wait_for_response = self._resolved_tool_wait_for_response_map()
elif "tools" in metadata:
self._runtime_tools = []
self._runtime_tool_executor = {}
self._runtime_tool_default_args = {}
self._runtime_tool_id_map = {}
self._runtime_tool_display_names = {}
self._runtime_tool_wait_for_response = {}
if self.llm_service and hasattr(self.llm_service, "set_knowledge_config"):
self.llm_service.set_knowledge_config(self._resolved_knowledge_config())
@@ -559,6 +575,10 @@ class DuplexPipeline:
data = event.get("data")
if not isinstance(data, dict):
data = {}
explicit_turn_id = str(event.get("turn_id") or "").strip() or None
explicit_utterance_id = str(event.get("utterance_id") or "").strip() or None
explicit_response_id = str(event.get("response_id") or "").strip() or None
explicit_tts_id = str(event.get("tts_id") or "").strip() or None
if self._current_turn_id:
data.setdefault("turn_id", self._current_turn_id)
if self._current_utterance_id:
@@ -567,9 +587,29 @@ class DuplexPipeline:
data.setdefault("response_id", self._current_response_id)
if self._current_tts_id:
data.setdefault("tts_id", self._current_tts_id)
if explicit_turn_id:
data["turn_id"] = explicit_turn_id
if explicit_utterance_id:
data["utterance_id"] = explicit_utterance_id
if explicit_response_id:
data["response_id"] = explicit_response_id
if explicit_tts_id:
data["tts_id"] = explicit_tts_id
for k, v in event.items():
if k in {"type", "timestamp", "sessionId", "seq", "source", "trackId", "data"}:
if k in {
"type",
"timestamp",
"sessionId",
"seq",
"source",
"trackId",
"data",
"turn_id",
"utterance_id",
"response_id",
"tts_id",
}:
continue
data.setdefault(k, v)
@@ -1027,25 +1067,50 @@ class DuplexPipeline:
priority=30,
)
async def _emit_llm_delta(self, text: str) -> None:
async def _emit_llm_delta(
self,
text: str,
*,
turn_id: Optional[str] = None,
utterance_id: Optional[str] = None,
response_id: Optional[str] = None,
) -> None:
event = {
**ev(
"assistant.response.delta",
trackId=self.track_audio_out,
text=text,
)
}
if turn_id:
event["turn_id"] = turn_id
if utterance_id:
event["utterance_id"] = utterance_id
if response_id:
event["response_id"] = response_id
await self._send_event(
{
**ev(
"assistant.response.delta",
trackId=self.track_audio_out,
text=text,
)
},
event,
priority=20,
)
async def _flush_pending_llm_delta(self) -> None:
async def _flush_pending_llm_delta(
self,
*,
turn_id: Optional[str] = None,
utterance_id: Optional[str] = None,
response_id: Optional[str] = None,
) -> None:
if not self._pending_llm_delta:
return
chunk = self._pending_llm_delta
self._pending_llm_delta = ""
self._last_llm_delta_emit_ms = time.monotonic() * 1000.0
await self._emit_llm_delta(chunk)
await self._emit_llm_delta(
chunk,
turn_id=turn_id,
utterance_id=utterance_id,
response_id=response_id,
)
async def _outbound_loop(self) -> None:
"""Single sender loop that enforces priority for interrupt events."""
@@ -1516,6 +1581,25 @@ class DuplexPipeline:
result[name] = dict(raw_defaults)
return result
def _resolved_tool_wait_for_response_map(self) -> Dict[str, bool]:
result: Dict[str, bool] = {}
for item in self._runtime_tools:
if not isinstance(item, dict):
continue
fn = item.get("function")
if isinstance(fn, dict) and fn.get("name"):
name = str(fn.get("name")).strip()
else:
name = str(item.get("name") or "").strip()
if not name:
continue
raw_wait = item.get("waitForResponse")
if raw_wait is None:
raw_wait = item.get("wait_for_response")
if isinstance(raw_wait, bool):
result[name] = raw_wait
return result
def _resolved_tool_id_map(self) -> Dict[str, str]:
result: Dict[str, str] = {}
for item in self._runtime_tools:
@@ -1586,6 +1670,9 @@ class DuplexPipeline:
def _tool_display_name(self, tool_name: str) -> str:
return str(self._runtime_tool_display_names.get(tool_name) or tool_name).strip()
def _tool_wait_for_response(self, tool_name: str) -> bool:
return bool(self._runtime_tool_wait_for_response.get(tool_name, False))
def _tool_executor(self, tool_call: Dict[str, Any]) -> str:
name = self._tool_name(tool_call)
if name and name in self._runtime_tool_executor:
@@ -1761,7 +1848,9 @@ class DuplexPipeline:
self._start_turn()
if not self._current_utterance_id:
self._finalize_utterance()
self._start_response()
turn_id = self._current_turn_id
utterance_id = self._current_utterance_id
response_id = self._start_response()
# Start latency tracking
self._turn_start_time = time.time()
self._first_audio_sent = False
@@ -1795,7 +1884,11 @@ class DuplexPipeline:
event = self._normalize_stream_event(raw_event)
if event.type == "tool_call":
await self._flush_pending_llm_delta()
await self._flush_pending_llm_delta(
turn_id=turn_id,
utterance_id=utterance_id,
response_id=response_id,
)
tool_call = event.tool_call if isinstance(event.tool_call, dict) else None
if not tool_call:
continue
@@ -1806,6 +1899,8 @@ class DuplexPipeline:
tool_name = self._tool_name(enriched_tool_call) or "unknown_tool"
tool_id = self._tool_id_for_name(tool_name)
tool_display_name = self._tool_display_name(tool_name) or tool_name
wait_for_response = self._tool_wait_for_response(tool_name)
enriched_tool_call["wait_for_response"] = wait_for_response
call_id = str(enriched_tool_call.get("id") or "").strip()
fn_payload = (
dict(enriched_tool_call.get("function"))
@@ -1839,6 +1934,7 @@ class DuplexPipeline:
tool_name=tool_name,
tool_id=tool_id,
tool_display_name=tool_display_name,
wait_for_response=wait_for_response,
arguments=tool_arguments,
executor=executor,
timeout_ms=int(self._TOOL_WAIT_TIMEOUT_SECONDS * 1000),
@@ -1869,7 +1965,11 @@ class DuplexPipeline:
self._last_llm_delta_emit_ms <= 0.0
or now_ms - self._last_llm_delta_emit_ms >= self._LLM_DELTA_THROTTLE_MS
):
await self._flush_pending_llm_delta()
await self._flush_pending_llm_delta(
turn_id=turn_id,
utterance_id=utterance_id,
response_id=response_id,
)
if use_engine_sentence_split:
while True:
@@ -1905,7 +2005,10 @@ class DuplexPipeline:
**ev(
"output.audio.start",
trackId=self.track_audio_out,
)
),
"turn_id": turn_id,
"utterance_id": utterance_id,
"response_id": response_id,
},
priority=30,
)
@@ -1915,13 +2018,20 @@ class DuplexPipeline:
sentence,
fade_in_ms=0,
fade_out_ms=8,
turn_id=turn_id,
utterance_id=utterance_id,
response_id=response_id,
)
if use_engine_sentence_split:
remaining_text = f"{pending_punctuation}{sentence_buffer}".strip()
else:
remaining_text = sentence_buffer.strip()
await self._flush_pending_llm_delta()
await self._flush_pending_llm_delta(
turn_id=turn_id,
utterance_id=utterance_id,
response_id=response_id,
)
if (
self._tts_output_enabled()
and remaining_text
@@ -1935,7 +2045,10 @@ class DuplexPipeline:
**ev(
"output.audio.start",
trackId=self.track_audio_out,
)
),
"turn_id": turn_id,
"utterance_id": utterance_id,
"response_id": response_id,
},
priority=30,
)
@@ -1944,6 +2057,9 @@ class DuplexPipeline:
remaining_text,
fade_in_ms=0,
fade_out_ms=8,
turn_id=turn_id,
utterance_id=utterance_id,
response_id=response_id,
)
if not tool_calls:
@@ -2007,14 +2123,21 @@ class DuplexPipeline:
]
if full_response and not self._interrupt_event.is_set():
await self._flush_pending_llm_delta()
await self._flush_pending_llm_delta(
turn_id=turn_id,
utterance_id=utterance_id,
response_id=response_id,
)
await self._send_event(
{
**ev(
"assistant.response.final",
trackId=self.track_audio_out,
text=full_response,
)
),
"turn_id": turn_id,
"utterance_id": utterance_id,
"response_id": response_id,
},
priority=20,
)
@@ -2026,7 +2149,10 @@ class DuplexPipeline:
**ev(
"output.audio.end",
trackId=self.track_audio_out,
)
),
"turn_id": turn_id,
"utterance_id": utterance_id,
"response_id": response_id,
}, priority=10)
# End assistant turn
@@ -2049,7 +2175,15 @@ class DuplexPipeline:
self._current_response_id = None
self._current_tts_id = None
async def _speak_sentence(self, text: str, fade_in_ms: int = 0, fade_out_ms: int = 8) -> None:
async def _speak_sentence(
self,
text: str,
fade_in_ms: int = 0,
fade_out_ms: int = 8,
turn_id: Optional[str] = None,
utterance_id: Optional[str] = None,
response_id: Optional[str] = None,
) -> None:
"""
Synthesize and send a single sentence.
@@ -2086,7 +2220,10 @@ class DuplexPipeline:
"metrics.ttfb",
trackId=self.track_audio_out,
latencyMs=round(ttfb_ms),
)
),
"turn_id": turn_id,
"utterance_id": utterance_id,
"response_id": response_id,
}, priority=25)
# Double-check interrupt right before sending audio
@@ -2233,6 +2370,9 @@ class DuplexPipeline:
self._is_bot_speaking = False
self._drop_outbound_audio = True
self._audio_out_frame_buffer = b""
interrupted_turn_id = self._current_turn_id
interrupted_utterance_id = self._current_utterance_id
interrupted_response_id = self._current_response_id
# Send interrupt event to client IMMEDIATELY
# This must happen BEFORE canceling services, so client knows to discard in-flight audio
@@ -2240,7 +2380,10 @@ class DuplexPipeline:
**ev(
"response.interrupted",
trackId=self.track_audio_out,
)
),
"turn_id": interrupted_turn_id,
"utterance_id": interrupted_utterance_id,
"response_id": interrupted_response_id,
}, priority=0)
# Cancel TTS

View File

@@ -1689,6 +1689,13 @@ const TOOL_PARAMETER_HINTS: Record<string, any> = {
},
required: ['msg'],
},
text_msg_prompt: {
type: 'object',
properties: {
msg: { type: 'string', description: 'Message text to display in debug drawer modal' },
},
required: ['msg'],
},
code_interpreter: {
type: 'object',
properties: {
@@ -1707,8 +1714,12 @@ const DEBUG_CLIENT_TOOLS = [
{ id: 'increase_volume', name: 'increase_volume', description: '调高音量' },
{ id: 'decrease_volume', name: 'decrease_volume', description: '调低音量' },
{ id: 'voice_message_prompt', name: 'voice_message_prompt', description: '语音消息提示' },
{ id: 'text_msg_prompt', name: 'text_msg_prompt', description: '文本消息提示' },
] as const;
const DEBUG_CLIENT_TOOL_ID_SET = new Set<string>(DEBUG_CLIENT_TOOLS.map((item) => item.id));
const DEBUG_CLIENT_TOOL_WAIT_DEFAULTS: Record<string, boolean> = {
text_msg_prompt: true,
};
type DynamicVariableEntry = {
id: string;
@@ -1973,6 +1984,16 @@ export const DebugDrawer: React.FC<{
const [inputText, setInputText] = useState('');
const [isLoading, setIsLoading] = useState(false);
const [callStatus, setCallStatus] = useState<'idle' | 'calling' | 'active'>('idle');
const [textPromptDialog, setTextPromptDialog] = useState<{
open: boolean;
message: string;
pendingResult?: {
toolCallId: string;
toolName: string;
toolDisplayName: string;
waitForResponse: boolean;
};
}>({ open: false, message: '' });
const [textSessionStarted, setTextSessionStarted] = useState(false);
const [wsStatus, setWsStatus] = useState<'disconnected' | 'connecting' | 'ready' | 'error'>('disconnected');
const [wsError, setWsError] = useState('');
@@ -2033,6 +2054,8 @@ export const DebugDrawer: React.FC<{
const assistantDraftIndexRef = useRef<number | null>(null);
const assistantResponseIndexByIdRef = useRef<Map<string, number>>(new Map());
const pendingTtfbByResponseIdRef = useRef<Map<string, number>>(new Map());
const interruptedResponseIdsRef = useRef<Set<string>>(new Set());
const interruptedDropNoticeKeysRef = useRef<Set<string>>(new Set());
const audioCtxRef = useRef<AudioContext | null>(null);
const playbackTimeRef = useRef<number>(0);
const activeAudioSourcesRef = useRef<Set<AudioBufferSourceNode>>(new Set());
@@ -2078,6 +2101,9 @@ export const DebugDrawer: React.FC<{
return null;
}
const isClientTool = debugClientTool ? true : (item?.category || 'query') === 'system';
const waitForResponse = isClientTool
? (item?.waitForResponse ?? DEBUG_CLIENT_TOOL_WAIT_DEFAULTS[toolId] ?? false)
: false;
const parameterSchema = (item?.parameterSchema && typeof item.parameterSchema === 'object')
? item.parameterSchema
: getDefaultToolParameters(toolId);
@@ -2087,6 +2113,7 @@ export const DebugDrawer: React.FC<{
return {
type: 'function',
executor: isClientTool ? 'client' : 'server',
waitForResponse,
...(parameterDefaults && Object.keys(parameterDefaults).length > 0 ? { defaultArgs: parameterDefaults } : {}),
function: {
name: toolId,
@@ -2101,6 +2128,31 @@ export const DebugDrawer: React.FC<{
assistantDraftIndexRef.current = null;
assistantResponseIndexByIdRef.current.clear();
pendingTtfbByResponseIdRef.current.clear();
interruptedResponseIdsRef.current.clear();
interruptedDropNoticeKeysRef.current.clear();
};
const extractResponseId = (payload: any): string | undefined => {
const responseIdRaw = payload?.data?.response_id ?? payload?.response_id ?? payload?.responseId;
const responseId = String(responseIdRaw || '').trim();
return responseId || undefined;
};
const noteInterruptedDrop = (responseId: string, kind: 'ttfb' | 'delta' | 'final') => {
const key = `${responseId}:${kind}`;
if (interruptedDropNoticeKeysRef.current.has(key)) return;
interruptedDropNoticeKeysRef.current.add(key);
if (interruptedDropNoticeKeysRef.current.size > 256) {
const oldest = interruptedDropNoticeKeysRef.current.values().next().value as string | undefined;
if (oldest) interruptedDropNoticeKeysRef.current.delete(oldest);
}
setMessages((prev) => [
...prev,
{
role: 'tool',
text: `drop stale ${kind} from interrupted response ${responseId}`,
},
]);
};
// Initialize
@@ -2120,6 +2172,7 @@ export const DebugDrawer: React.FC<{
stopVoiceCapture();
stopMedia();
closeWs();
setTextPromptDialog({ open: false, message: '' });
if (audioCtxRef.current) {
void audioCtxRef.current.close();
audioCtxRef.current = null;
@@ -2346,6 +2399,64 @@ export const DebugDrawer: React.FC<{
clearPlaybackQueue();
};
const emitClientToolResult = (resultPayload: any, toolDisplayName?: string) => {
const ws = wsRef.current;
if (ws && ws.readyState === WebSocket.OPEN) {
ws.send(
JSON.stringify({
type: 'tool_call.results',
results: [resultPayload],
})
);
}
const statusCode = Number(resultPayload?.status?.code || 500);
const statusMessage = String(resultPayload?.status?.message || 'error');
const displayName = toolDisplayName || String(resultPayload?.name || 'unknown_tool');
const resultText =
statusCode === 200 && typeof resultPayload?.output?.result === 'number'
? `result ${displayName} = ${resultPayload.output.result}`
: `result ${displayName} status=${statusCode} ${statusMessage}`;
setMessages((prev) => [
...prev,
{
role: 'tool',
text: resultText,
},
]);
};
const closeTextPromptDialog = (action: 'confirm' | 'dismiss') => {
let pending:
| {
toolCallId: string;
toolName: string;
toolDisplayName: string;
waitForResponse: boolean;
}
| undefined;
let message = '';
setTextPromptDialog((prev) => {
pending = prev.pendingResult;
message = prev.message;
return { open: false, message: '' };
});
if (pending?.waitForResponse) {
emitClientToolResult(
{
tool_call_id: pending.toolCallId,
name: pending.toolName,
output: {
message: 'text_prompt_closed',
action,
msg: message,
},
status: { code: 200, message: 'ok' },
},
pending.toolDisplayName
);
}
};
const scheduleQueuedPlayback = (ctx: AudioContext) => {
const queue = queuedAudioBuffersRef.current;
if (queue.length === 0) return;
@@ -2474,12 +2585,16 @@ export const DebugDrawer: React.FC<{
};
const handleHangup = () => {
if (textPromptDialog.open) {
closeTextPromptDialog('dismiss');
}
stopVoiceCapture();
stopMedia();
closeWs();
setCallStatus('idle');
clearResponseTracking();
setMessages([]);
setTextPromptDialog({ open: false, message: '' });
lastUserFinalRef.current = '';
setIsLoading(false);
};
@@ -2903,6 +3018,14 @@ export const DebugDrawer: React.FC<{
}
if (type === 'response.interrupted') {
const interruptedResponseId = extractResponseId(payload);
if (interruptedResponseId) {
interruptedResponseIdsRef.current.add(interruptedResponseId);
if (interruptedResponseIdsRef.current.size > 64) {
const oldest = interruptedResponseIdsRef.current.values().next().value as string | undefined;
if (oldest) interruptedResponseIdsRef.current.delete(oldest);
}
}
assistantDraftIndexRef.current = null;
setIsLoading(false);
stopPlaybackImmediately();
@@ -2913,8 +3036,11 @@ export const DebugDrawer: React.FC<{
const maybeTtfb = Number(payload?.latencyMs ?? payload?.data?.latencyMs);
if (!Number.isFinite(maybeTtfb) || maybeTtfb < 0) return;
const ttfbMs = Math.round(maybeTtfb);
const responseIdRaw = payload?.data?.response_id ?? payload?.response_id ?? payload?.responseId;
const responseId = String(responseIdRaw || '').trim();
const responseId = extractResponseId(payload);
if (responseId && interruptedResponseIdsRef.current.has(responseId)) {
noteInterruptedDrop(responseId, 'ttfb');
return;
}
if (responseId) {
const indexed = assistantResponseIndexByIdRef.current.get(responseId);
if (typeof indexed === 'number') {
@@ -2967,6 +3093,9 @@ export const DebugDrawer: React.FC<{
parsedArgs = {};
}
}
const waitForResponse = Boolean(
payload?.wait_for_response ?? toolCall?.wait_for_response ?? toolCall?.waitForResponse ?? false
);
const resultPayload: any = {
tool_call_id: toolCallId,
name: toolName,
@@ -2977,31 +3106,21 @@ export const DebugDrawer: React.FC<{
resultPayload.output = { message: `Client tool '${toolName}' is disabled in debug settings` };
resultPayload.status = { code: 503, message: 'tool_disabled' };
}
const sendToolResult = () => {
ws.send(
JSON.stringify({
type: 'tool_call.results',
results: [resultPayload],
})
);
const statusCode = Number(resultPayload?.status?.code || 500);
const statusMessage = String(resultPayload?.status?.message || 'error');
const resultText =
statusCode === 200 && typeof resultPayload?.output?.result === 'number'
? `result ${toolDisplayName} = ${resultPayload.output.result}`
: `result ${toolDisplayName} status=${statusCode} ${statusMessage}`;
setMessages((prev) => [
...prev,
{
role: 'tool',
text: resultText,
},
]);
};
try {
if (resultPayload.status.code === 503) {
// Keep disabled result as-is.
} else if (toolName === 'turn_on_camera') {
if (!waitForResponse) {
emitClientToolResult(
{
tool_call_id: toolCallId,
name: toolName,
output: { message: 'camera_on_dispatched' },
status: { code: 200, message: 'ok' },
},
toolDisplayName
);
}
navigator.mediaDevices
.getUserMedia({
video: selectedCamera ? { deviceId: { exact: selectedCamera } } : true,
@@ -3010,20 +3129,36 @@ export const DebugDrawer: React.FC<{
.then((stream) => {
if (videoRef.current) videoRef.current.srcObject = stream;
streamRef.current = stream;
resultPayload.output = {
message: 'camera_on',
tracks: stream.getVideoTracks().length,
};
resultPayload.status = { code: 200, message: 'ok' };
sendToolResult();
if (waitForResponse) {
emitClientToolResult(
{
tool_call_id: toolCallId,
name: toolName,
output: {
message: 'camera_on',
tracks: stream.getVideoTracks().length,
},
status: { code: 200, message: 'ok' },
},
toolDisplayName
);
}
})
.catch((err) => {
resultPayload.output = {
message: `Client tool '${toolName}' failed`,
error: err instanceof Error ? err.message : String(err),
};
resultPayload.status = { code: 500, message: 'client_tool_failed' };
sendToolResult();
if (waitForResponse) {
emitClientToolResult(
{
tool_call_id: toolCallId,
name: toolName,
output: {
message: `Client tool '${toolName}' failed`,
error: err instanceof Error ? err.message : String(err),
},
status: { code: 500, message: 'client_tool_failed' },
},
toolDisplayName
);
}
});
return;
} else if (toolName === 'turn_off_camera') {
@@ -3058,6 +3193,36 @@ export const DebugDrawer: React.FC<{
const utterance = new SpeechSynthesisUtterance(msg);
utterance.lang = 'zh-CN';
window.speechSynthesis.cancel();
if (waitForResponse) {
utterance.onend = () => {
emitClientToolResult(
{
tool_call_id: toolCallId,
name: toolName,
output: { message: 'voice_prompt_completed', msg },
status: { code: 200, message: 'ok' },
},
toolDisplayName
);
};
utterance.onerror = (event) => {
emitClientToolResult(
{
tool_call_id: toolCallId,
name: toolName,
output: {
message: 'voice_prompt_failed',
msg,
error: String(event.error || 'speech_error'),
},
status: { code: 500, message: 'client_tool_failed' },
},
toolDisplayName
);
};
window.speechSynthesis.speak(utterance);
return;
}
window.speechSynthesis.speak(utterance);
resultPayload.output = { message: 'voice_prompt_sent', msg };
resultPayload.status = { code: 200, message: 'ok' };
@@ -3065,6 +3230,29 @@ export const DebugDrawer: React.FC<{
resultPayload.output = { message: 'speech_synthesis_unavailable', msg };
resultPayload.status = { code: 503, message: 'speech_output_unavailable' };
}
} else if (toolName === 'text_msg_prompt') {
const msg = String(parsedArgs?.msg || '').trim();
if (!msg) {
resultPayload.output = { message: "Missing required argument 'msg'" };
resultPayload.status = { code: 422, message: 'invalid_arguments' };
} else {
setTextPromptDialog({
open: true,
message: msg,
pendingResult: {
toolCallId: toolCallId,
toolName,
toolDisplayName,
waitForResponse,
},
});
if (!waitForResponse) {
resultPayload.output = { message: 'text_prompt_shown', msg };
resultPayload.status = { code: 200, message: 'ok' };
} else {
return;
}
}
}
} catch (err) {
resultPayload.output = {
@@ -3073,7 +3261,7 @@ export const DebugDrawer: React.FC<{
};
resultPayload.status = { code: 500, message: 'client_tool_failed' };
}
sendToolResult();
emitClientToolResult(resultPayload, toolDisplayName);
}
return;
}
@@ -3183,8 +3371,11 @@ export const DebugDrawer: React.FC<{
if (type === 'assistant.response.delta') {
const delta = String(payload.text || '');
if (!delta) return;
const responseIdRaw = payload?.data?.response_id ?? payload?.response_id ?? payload?.responseId;
const responseId = String(responseIdRaw || '').trim() || undefined;
const responseId = extractResponseId(payload);
if (responseId && interruptedResponseIdsRef.current.has(responseId)) {
noteInterruptedDrop(responseId, 'delta');
return;
}
setMessages((prev) => {
let idx = assistantDraftIndexRef.current;
if (idx === null || !prev[idx] || prev[idx].role !== 'model') {
@@ -3250,8 +3441,11 @@ export const DebugDrawer: React.FC<{
if (type === 'assistant.response.final') {
const finalText = String(payload.text || '');
const responseIdRaw = payload?.data?.response_id ?? payload?.response_id ?? payload?.responseId;
const responseId = String(responseIdRaw || '').trim() || undefined;
const responseId = extractResponseId(payload);
if (responseId && interruptedResponseIdsRef.current.has(responseId)) {
noteInterruptedDrop(responseId, 'final');
return;
}
setMessages((prev) => {
let idx = assistantDraftIndexRef.current;
assistantDraftIndexRef.current = null;
@@ -3714,7 +3908,7 @@ export const DebugDrawer: React.FC<{
)}
</div>
<div className={mode === 'text' && textSessionStarted ? 'shrink-0 mt-3 px-1 mb-3' : mode === 'voice' ? 'shrink-0 space-y-3 mt-3 px-1 mb-3' : 'shrink-0 space-y-2 mt-2 px-1 mb-3'}>
<div className={mode === 'text' && textSessionStarted ? 'shrink-0 mt-3 px-1 mb-3' : mode === 'voice' ? 'shrink-0 space-y-3 mt-3 px-1 mb-3' : 'shrink-0 space-y-2 mt-2 px-1 mb-3'}>
{mode === 'voice' && (
<div className="w-full flex items-center gap-2 pb-1">
<span className="text-xs text-muted-foreground shrink-0"></span>
@@ -3768,11 +3962,34 @@ export const DebugDrawer: React.FC<{
>
<Send className="h-4 w-4" />
</Button>
</div>
</div>
</div>
{textPromptDialog.open && (
<div className="absolute inset-0 z-40 flex items-center justify-center bg-black/55 backdrop-blur-[1px]">
<div className="relative w-[92%] max-w-md rounded-xl border border-white/15 bg-card/95 p-4 shadow-2xl animate-in zoom-in-95 duration-200">
<button
type="button"
onClick={() => closeTextPromptDialog('dismiss')}
className="absolute right-3 top-3 rounded-sm opacity-70 hover:opacity-100 text-muted-foreground hover:text-foreground transition-opacity"
title="关闭"
>
<X className="h-4 w-4" />
</button>
<div className="mb-3 pr-6">
<div className="text-[10px] font-black tracking-[0.14em] uppercase text-amber-300"></div>
<p className="mt-2 text-sm leading-6 text-foreground whitespace-pre-wrap break-words">{textPromptDialog.message}</p>
</div>
<div className="flex justify-end">
<Button size="sm" onClick={() => closeTextPromptDialog('confirm')}>
</Button>
</div>
</div>
</div>
</div>
</div>
</Drawer>
)}
</div>
</Drawer>
{isOpen && (
<div className="fixed inset-y-0 z-[51] right-[min(100vw,32rem)]">
<button

View File

@@ -184,6 +184,7 @@ export const ToolLibraryPage: React.FC = () => {
const [toolCategory, setToolCategory] = useState<'system' | 'query'>('system');
const [toolIcon, setToolIcon] = useState('Wrench');
const [toolEnabled, setToolEnabled] = useState(true);
const [toolWaitForResponse, setToolWaitForResponse] = useState(false);
const [toolHttpMethod, setToolHttpMethod] = useState<'GET' | 'POST' | 'PUT' | 'PATCH' | 'DELETE'>('GET');
const [toolHttpUrl, setToolHttpUrl] = useState('');
const [toolHttpHeadersText, setToolHttpHeadersText] = useState('{}');
@@ -215,6 +216,7 @@ export const ToolLibraryPage: React.FC = () => {
setToolCategory('system');
setToolIcon('Wrench');
setToolEnabled(true);
setToolWaitForResponse(false);
setToolHttpMethod('GET');
setToolHttpUrl('');
setToolHttpHeadersText('{}');
@@ -231,6 +233,7 @@ export const ToolLibraryPage: React.FC = () => {
setToolCategory(tool.category);
setToolIcon(tool.icon || 'Wrench');
setToolEnabled(tool.enabled ?? true);
setToolWaitForResponse(Boolean(tool.waitForResponse));
setToolHttpMethod((tool.httpMethod || 'GET') as 'GET' | 'POST' | 'PUT' | 'PATCH' | 'DELETE');
setToolHttpUrl(tool.httpUrl || '');
setToolHttpHeadersText(JSON.stringify(tool.httpHeaders || {}, null, 2));
@@ -269,6 +272,15 @@ export const ToolLibraryPage: React.FC = () => {
<Badge variant="outline" className={`text-[10px] border-0 px-0 ${tool.category === 'system' ? 'text-primary' : 'text-blue-400'}`}>
{tool.category === 'system' ? 'SYSTEM' : 'QUERY'}
</Badge>
{tool.category === 'system' && (
<Badge
variant="outline"
className={`text-[10px] h-4 px-1.5 border ${tool.waitForResponse ? 'border-amber-400/40 text-amber-300' : 'border-white/20 text-muted-foreground'}`}
title={tool.waitForResponse ? 'wait for response: ON' : 'wait for response: OFF'}
>
{tool.waitForResponse ? 'WAIT' : 'NO-WAIT'}
</Badge>
)}
<span className="text-[10px] text-muted-foreground font-mono opacity-50 truncate">ID: {tool.id}</span>
</div>
<p className="text-xs text-muted-foreground line-clamp-2 leading-relaxed opacity-80">{tool.description}</p>
@@ -371,6 +383,7 @@ export const ToolLibraryPage: React.FC = () => {
httpTimeoutMs: toolHttpTimeoutMs,
parameterSchema: parsedParameterSchema,
parameterDefaults: parsedParameterDefaults,
waitForResponse: toolCategory === 'system' ? toolWaitForResponse : false,
enabled: toolEnabled,
});
setTools((prev) => prev.map((item) => (item.id === updated.id ? updated : item)));
@@ -387,6 +400,7 @@ export const ToolLibraryPage: React.FC = () => {
httpTimeoutMs: toolHttpTimeoutMs,
parameterSchema: parsedParameterSchema,
parameterDefaults: parsedParameterDefaults,
waitForResponse: toolCategory === 'system' ? toolWaitForResponse : false,
enabled: toolEnabled,
});
setTools((prev) => [created, ...prev]);
@@ -573,6 +587,22 @@ export const ToolLibraryPage: React.FC = () => {
/>
</div>
{toolCategory === 'system' && (
<div className="rounded-md border border-white/10 bg-black/20 p-3 space-y-1.5">
<label className="flex items-center gap-2 text-xs text-muted-foreground">
<input
type="checkbox"
checked={toolWaitForResponse}
onChange={(e) => setToolWaitForResponse(e.target.checked)}
/>
(wait for response)
</label>
<p className="text-[11px] text-muted-foreground">
</p>
</div>
)}
<div className="space-y-4 rounded-md border border-white/10 bg-white/5 p-3">
<div className="flex items-center justify-between">
<div className="text-[10px] font-black uppercase tracking-widest text-emerald-300">Tool Parameters</div>

View File

@@ -125,6 +125,7 @@ const mapTool = (raw: AnyRecord): Tool => ({
httpTimeoutMs: Number(readField(raw, ['httpTimeoutMs', 'http_timeout_ms'], 10000)),
parameterSchema: readField(raw, ['parameterSchema', 'parameter_schema'], {}),
parameterDefaults: readField(raw, ['parameterDefaults', 'parameter_defaults'], {}),
waitForResponse: Boolean(readField(raw, ['waitForResponse', 'wait_for_response'], false)),
isSystem: Boolean(readField(raw, ['isSystem', 'is_system'], false)),
enabled: Boolean(readField(raw, ['enabled'], true)),
isCustom: !Boolean(readField(raw, ['isSystem', 'is_system'], false)),
@@ -571,6 +572,7 @@ export const createTool = async (data: Partial<Tool>): Promise<Tool> => {
http_timeout_ms: data.httpTimeoutMs ?? 10000,
parameter_schema: data.parameterSchema || {},
parameter_defaults: data.parameterDefaults || {},
wait_for_response: data.waitForResponse ?? false,
enabled: data.enabled ?? true,
};
const response = await apiRequest<AnyRecord>('/tools/resources', { method: 'POST', body: payload });
@@ -589,6 +591,7 @@ export const updateTool = async (id: string, data: Partial<Tool>): Promise<Tool>
http_timeout_ms: data.httpTimeoutMs,
parameter_schema: data.parameterSchema,
parameter_defaults: data.parameterDefaults,
wait_for_response: data.waitForResponse,
enabled: data.enabled,
};
const response = await apiRequest<AnyRecord>(`/tools/resources/${id}`, { method: 'PUT', body: payload });

View File

@@ -199,6 +199,7 @@ export interface Tool {
httpTimeoutMs?: number;
parameterSchema?: Record<string, any>;
parameterDefaults?: Record<string, any>;
waitForResponse?: boolean;
isCustom?: boolean;
isSystem?: boolean;
enabled?: boolean;