Add voice_choice_prompt and text_choice_prompt tools to API and UI. Implement state management and parameter definitions for user selection prompts, enhancing user interaction and experience.

This commit is contained in:
Xin Wang
2026-03-02 00:49:31 +08:00
parent 3a5d27d6c3
commit 1561056a3d
3 changed files with 376 additions and 92 deletions

View File

@@ -109,6 +109,67 @@ TOOL_REGISTRY = {
"required": ["msg"]
}
},
"voice_choice_prompt": {
"name": "语音选项提示",
"description": "播报问题并展示可选项,等待用户选择后回传结果",
"parameters": {
"type": "object",
"properties": {
"question": {"type": "string", "description": "向用户展示的问题文本"},
"options": {
"type": "array",
"description": "可选项(字符串或含 id/label/value 的对象)",
"minItems": 2,
"items": {
"anyOf": [
{"type": "string"},
{
"type": "object",
"properties": {
"id": {"type": "string"},
"label": {"type": "string"},
"value": {"type": "string"}
},
"required": ["label"]
}
]
}
},
"voice_text": {"type": "string", "description": "可选,单独指定播报文本;为空则播报 question"}
},
"required": ["question", "options"]
}
},
"text_choice_prompt": {
"name": "文本选项提示",
"description": "显示文本选项弹窗并等待用户选择后回传结果",
"parameters": {
"type": "object",
"properties": {
"question": {"type": "string", "description": "向用户展示的问题文本"},
"options": {
"type": "array",
"description": "可选项(字符串或含 id/label/value 的对象)",
"minItems": 2,
"items": {
"anyOf": [
{"type": "string"},
{
"type": "object",
"properties": {
"id": {"type": "string"},
"label": {"type": "string"},
"value": {"type": "string"}
},
"required": ["label"]
}
]
}
}
},
"required": ["question", "options"]
}
},
}
TOOL_CATEGORY_MAP = {
@@ -121,6 +182,8 @@ TOOL_CATEGORY_MAP = {
"decrease_volume": "system",
"voice_message_prompt": "system",
"text_msg_prompt": "system",
"voice_choice_prompt": "system",
"text_choice_prompt": "system",
}
TOOL_ICON_MAP = {
@@ -133,6 +196,8 @@ TOOL_ICON_MAP = {
"decrease_volume": "Volume2",
"voice_message_prompt": "Volume2",
"text_msg_prompt": "Terminal",
"voice_choice_prompt": "Volume2",
"text_choice_prompt": "Terminal",
}
TOOL_HTTP_DEFAULTS = {
@@ -145,6 +210,8 @@ TOOL_PARAMETER_DEFAULTS = {
TOOL_WAIT_FOR_RESPONSE_DEFAULTS = {
"text_msg_prompt": True,
"voice_choice_prompt": True,
"text_choice_prompt": True,
}

View File

@@ -168,6 +168,70 @@ class DuplexPipeline:
"required": ["msg"],
},
},
"voice_choice_prompt": {
"name": "voice_choice_prompt",
"description": "Speak a question and show options on client side, then wait for selection",
"parameters": {
"type": "object",
"properties": {
"question": {"type": "string", "description": "Question text to show"},
"options": {
"type": "array",
"description": "Selectable options (string or object with id/label/value)",
"minItems": 2,
"items": {
"anyOf": [
{"type": "string"},
{
"type": "object",
"properties": {
"id": {"type": "string"},
"label": {"type": "string"},
"value": {"type": "string"},
},
"required": ["label"],
},
]
},
},
"voice_text": {
"type": "string",
"description": "Optional voice text. Falls back to question when omitted.",
},
},
"required": ["question", "options"],
},
},
"text_choice_prompt": {
"name": "text_choice_prompt",
"description": "Show a text-only choice prompt on client side and wait for selection",
"parameters": {
"type": "object",
"properties": {
"question": {"type": "string", "description": "Question text to show"},
"options": {
"type": "array",
"description": "Selectable options (string or object with id/label/value)",
"minItems": 2,
"items": {
"anyOf": [
{"type": "string"},
{
"type": "object",
"properties": {
"id": {"type": "string"},
"label": {"type": "string"},
"value": {"type": "string"},
},
"required": ["label"],
},
]
},
},
},
"required": ["question", "options"],
},
},
}
_DEFAULT_CLIENT_EXECUTORS = frozenset({
"turn_on_camera",
@@ -176,6 +240,8 @@ class DuplexPipeline:
"decrease_volume",
"voice_message_prompt",
"text_msg_prompt",
"voice_choice_prompt",
"text_choice_prompt",
})
def __init__(

View File

@@ -4,7 +4,7 @@ import { createPortal } from 'react-dom';
import { Plus, Search, Play, Square, Copy, Trash2, Mic, MessageSquare, Save, Video, PhoneOff, Camera, ArrowLeftRight, Send, Phone, Rocket, AlertTriangle, PhoneCall, CameraOff, Image, Images, CloudSun, Calendar, TrendingUp, Coins, Wrench, Globe, Terminal, X, ClipboardCheck, Sparkles, Volume2, Timer, ChevronDown, Database, Server, Zap, ExternalLink, Key, BrainCircuit, Ear, Book, Filter } from 'lucide-react';
import { Button, Input, Badge, Drawer, Dialog, Switch } from '../components/UI';
import { ASRModel, Assistant, KnowledgeBase, LLMModel, TabValue, Tool, Voice } from '../types';
import { createAssistant, deleteAssistant, fetchASRModels, fetchAssistantOpenerAudioPcmBuffer, fetchAssistants, fetchKnowledgeBases, fetchLLMModels, fetchTools, fetchVoices, generateAssistantOpenerAudio, updateAssistant as updateAssistantApi } from '../services/backendApi';
import { createAssistant, deleteAssistant, fetchASRModels, fetchAssistantOpenerAudioPcmBuffer, fetchAssistants, fetchKnowledgeBases, fetchLLMModels, fetchTools, fetchVoices, generateAssistantOpenerAudio, previewVoice, updateAssistant as updateAssistantApi } from '../services/backendApi';
const isOpenAICompatibleVendor = (vendor?: string) => {
const normalized = String(vendor || '').trim().toLowerCase();
@@ -1696,7 +1696,34 @@ const TOOL_PARAMETER_HINTS: Record<string, any> = {
},
required: ['msg'],
},
choice_prompt: {
voice_choice_prompt: {
type: 'object',
properties: {
question: { type: 'string', description: 'Question text to ask the user' },
options: {
type: 'array',
description: 'Selectable options (string or object with id/label/value)',
minItems: 2,
items: {
anyOf: [
{ type: 'string' },
{
type: 'object',
properties: {
id: { type: 'string' },
label: { type: 'string' },
value: { type: 'string' },
},
required: ['label'],
},
],
},
},
voice_text: { type: 'string', description: 'Optional custom voice text, defaults to question' },
},
required: ['question', 'options'],
},
text_choice_prompt: {
type: 'object',
properties: {
question: { type: 'string', description: 'Question text to ask the user' },
@@ -1741,12 +1768,14 @@ const DEBUG_CLIENT_TOOLS = [
{ id: 'decrease_volume', name: 'decrease_volume', description: '调低音量' },
{ id: 'voice_message_prompt', name: 'voice_message_prompt', description: '语音消息提示' },
{ id: 'text_msg_prompt', name: 'text_msg_prompt', description: '文本消息提示' },
{ id: 'choice_prompt', name: 'choice_prompt', description: '选项问题提示' },
{ id: 'voice_choice_prompt', name: 'voice_choice_prompt', description: '语音选项提示(原子)' },
{ id: 'text_choice_prompt', name: 'text_choice_prompt', description: '文本选项提示(等待选择)' },
] as const;
const DEBUG_CLIENT_TOOL_ID_SET = new Set<string>(DEBUG_CLIENT_TOOLS.map((item) => item.id));
const DEBUG_CLIENT_TOOL_WAIT_DEFAULTS: Record<string, boolean> = {
text_msg_prompt: true,
choice_prompt: true,
voice_choice_prompt: true,
text_choice_prompt: true,
};
type DynamicVariableEntry = {
@@ -1936,6 +1965,25 @@ type DebugChoicePromptOption = {
value: string;
};
type DebugTextPromptDialogState = {
open: boolean;
message: string;
pendingResult?: DebugPromptPendingResult;
};
type DebugChoicePromptDialogState = {
open: boolean;
question: string;
options: DebugChoicePromptOption[];
pendingResult?: DebugPromptPendingResult;
requireSelection?: boolean;
voiceText?: string;
};
type DebugPromptQueueItem =
| { kind: 'text'; payload: Omit<DebugTextPromptDialogState, 'open'> }
| { kind: 'choice'; payload: Omit<DebugChoicePromptDialogState, 'open'> };
const normalizeChoicePromptOptions = (rawOptions: unknown[]): DebugChoicePromptOption[] => {
const usedIds = new Set<string>();
const resolved: DebugChoicePromptOption[] = [];
@@ -2055,19 +2103,12 @@ export const DebugDrawer: React.FC<{
const [inputText, setInputText] = useState('');
const [isLoading, setIsLoading] = useState(false);
const [callStatus, setCallStatus] = useState<'idle' | 'calling' | 'active'>('idle');
const [textPromptDialog, setTextPromptDialog] = useState<{
open: boolean;
message: string;
pendingResult?: DebugPromptPendingResult;
}>({ open: false, message: '' });
const [choicePromptDialog, setChoicePromptDialog] = useState<{
open: boolean;
question: string;
options: DebugChoicePromptOption[];
pendingResult?: DebugPromptPendingResult;
}>({ open: false, question: '', options: [] });
const [textPromptDialog, setTextPromptDialog] = useState<DebugTextPromptDialogState>({ open: false, message: '' });
const [choicePromptDialog, setChoicePromptDialog] = useState<DebugChoicePromptDialogState>({ open: false, question: '', options: [] });
const textPromptDialogRef = useRef(textPromptDialog);
const choicePromptDialogRef = useRef(choicePromptDialog);
const promptDialogQueueRef = useRef<DebugPromptQueueItem[]>([]);
const promptAudioRef = useRef<HTMLAudioElement | null>(null);
const [textSessionStarted, setTextSessionStarted] = useState(false);
const [wsStatus, setWsStatus] = useState<'disconnected' | 'connecting' | 'ready' | 'error'>('disconnected');
const [wsError, setWsError] = useState('');
@@ -2245,9 +2286,17 @@ export const DebugDrawer: React.FC<{
}
} else {
setMode('text');
if (textPromptDialogRef.current.open) {
closeTextPromptDialog('dismiss', { force: true, skipQueueAdvance: true });
}
if (choicePromptDialogRef.current.open) {
closeChoicePromptDialog('dismiss', undefined, { force: true, skipQueueAdvance: true });
}
stopVoiceCapture();
stopMedia();
closeWs();
stopPromptVoicePlayback();
promptDialogQueueRef.current = [];
setTextPromptDialog({ open: false, message: '' });
setChoicePromptDialog({ open: false, question: '', options: [] });
if (audioCtxRef.current) {
@@ -2514,8 +2563,102 @@ export const DebugDrawer: React.FC<{
]);
};
const closeTextPromptDialog = (action: 'confirm' | 'dismiss') => {
const stopPromptVoicePlayback = () => {
if (promptAudioRef.current) {
try {
promptAudioRef.current.pause();
} catch {
// no-op
}
promptAudioRef.current = null;
}
if (typeof window !== 'undefined' && 'speechSynthesis' in window) {
window.speechSynthesis.cancel();
}
};
const playPromptVoice = async (text: string) => {
const phrase = String(text || '').trim();
if (!phrase) return;
stopPromptVoicePlayback();
const canUseAssistantTts = assistant.voiceOutputEnabled !== false && Boolean(assistant.voice);
if (canUseAssistantTts) {
const selectedVoice = voices.find((item) => item.id === assistant.voice);
if (selectedVoice) {
try {
const audioUrl = await previewVoice(selectedVoice.id, phrase, assistant.speed);
const audio = new Audio(audioUrl);
promptAudioRef.current = audio;
audio.onended = () => {
if (promptAudioRef.current === audio) {
promptAudioRef.current = null;
}
};
audio.onerror = () => {
if (promptAudioRef.current === audio) {
promptAudioRef.current = null;
}
};
await audio.play();
return;
} catch (err) {
console.warn('Assistant TTS preview failed, falling back to speechSynthesis', err);
}
}
}
if (typeof window !== 'undefined' && 'speechSynthesis' in window) {
const utterance = new SpeechSynthesisUtterance(phrase);
utterance.lang = assistant.language === 'en' ? 'en-US' : 'zh-CN';
window.speechSynthesis.cancel();
window.speechSynthesis.speak(utterance);
}
};
const hasActivePromptDialog = () => textPromptDialogRef.current.open || choicePromptDialogRef.current.open;
const activatePromptDialog = (item: DebugPromptQueueItem) => {
if (item.kind === 'text') {
setTextPromptDialog({
open: true,
message: item.payload.message,
pendingResult: item.payload.pendingResult,
});
return;
}
const nextVoiceText = String(item.payload.voiceText || '').trim();
setChoicePromptDialog({
open: true,
question: item.payload.question,
options: item.payload.options,
pendingResult: item.payload.pendingResult,
requireSelection: item.payload.requireSelection === true,
voiceText: nextVoiceText || undefined,
});
if (nextVoiceText) {
void playPromptVoice(nextVoiceText);
}
};
const enqueuePromptDialog = (item: DebugPromptQueueItem) => {
if (hasActivePromptDialog()) {
promptDialogQueueRef.current.push(item);
return;
}
activatePromptDialog(item);
};
const openNextPromptDialog = (force = false) => {
if (!force && hasActivePromptDialog()) return;
const next = promptDialogQueueRef.current.shift();
if (!next) return;
activatePromptDialog(next);
};
const closeTextPromptDialog = (action: 'confirm' | 'dismiss', opts?: { force?: boolean; skipQueueAdvance?: boolean }) => {
const snapshot = textPromptDialogRef.current;
if (!snapshot.open && !opts?.force) return;
const pending = snapshot?.pendingResult;
const message = snapshot?.message || '';
setTextPromptDialog({ open: false, message: '' });
@@ -2534,16 +2677,25 @@ export const DebugDrawer: React.FC<{
pending.toolDisplayName
);
}
if (!opts?.skipQueueAdvance) {
openNextPromptDialog(true);
}
};
const closeChoicePromptDialog = (
action: 'select' | 'dismiss',
selectedOption?: DebugChoicePromptOption
selectedOption?: DebugChoicePromptOption,
opts?: { force?: boolean; skipQueueAdvance?: boolean }
) => {
const snapshot = choicePromptDialogRef.current;
if (!snapshot.open && !opts?.force) return;
if (snapshot.requireSelection && action !== 'select' && !opts?.force) {
return;
}
const pending = snapshot?.pendingResult;
const question = snapshot?.question || '';
const options = snapshot?.options || [];
stopPromptVoicePlayback();
setChoicePromptDialog({ open: false, question: '', options: [] });
if (pending?.waitForResponse) {
emitClientToolResult(
@@ -2568,6 +2720,9 @@ export const DebugDrawer: React.FC<{
pending.toolDisplayName
);
}
if (!opts?.skipQueueAdvance) {
openNextPromptDialog(true);
}
};
const scheduleQueuedPlayback = (ctx: AudioContext) => {
@@ -2699,11 +2854,13 @@ export const DebugDrawer: React.FC<{
const handleHangup = () => {
if (textPromptDialog.open) {
closeTextPromptDialog('dismiss');
closeTextPromptDialog('dismiss', { force: true, skipQueueAdvance: true });
}
if (choicePromptDialog.open) {
closeChoicePromptDialog('dismiss');
closeChoicePromptDialog('dismiss', undefined, { force: true, skipQueueAdvance: true });
}
stopPromptVoicePlayback();
promptDialogQueueRef.current = [];
stopVoiceCapture();
stopMedia();
closeWs();
@@ -3059,6 +3216,10 @@ export const DebugDrawer: React.FC<{
userDraftIndexRef.current = null;
lastUserFinalRef.current = '';
micFrameBufferRef.current = new Uint8Array(0);
stopPromptVoicePlayback();
promptDialogQueueRef.current = [];
setTextPromptDialog({ open: false, message: '' });
setChoicePromptDialog({ open: false, question: '', options: [] });
setTextSessionStarted(false);
stopPlaybackImmediately();
if (isOpen) setWsStatus('disconnected');
@@ -3220,9 +3381,11 @@ export const DebugDrawer: React.FC<{
parsedArgs = {};
}
}
const waitForResponse = Boolean(
const waitForResponseRaw = Boolean(
payload?.wait_for_response ?? toolCall?.wait_for_response ?? toolCall?.waitForResponse ?? false
);
const waitForResponse =
toolName === 'voice_choice_prompt' || toolName === 'text_choice_prompt' ? true : waitForResponseRaw;
const resultPayload: any = {
tool_call_id: toolCallId,
name: toolName,
@@ -3316,46 +3479,14 @@ export const DebugDrawer: React.FC<{
if (!msg) {
resultPayload.output = { message: "Missing required argument 'msg'" };
resultPayload.status = { code: 422, message: 'invalid_arguments' };
} else if (typeof window !== 'undefined' && 'speechSynthesis' in window) {
const utterance = new SpeechSynthesisUtterance(msg);
utterance.lang = 'zh-CN';
window.speechSynthesis.cancel();
} else {
void playPromptVoice(msg);
if (waitForResponse) {
utterance.onend = () => {
emitClientToolResult(
{
tool_call_id: toolCallId,
name: toolName,
output: { message: 'voice_prompt_completed', msg },
status: { code: 200, message: 'ok' },
},
toolDisplayName
);
};
utterance.onerror = (event) => {
emitClientToolResult(
{
tool_call_id: toolCallId,
name: toolName,
output: {
message: 'voice_prompt_failed',
msg,
error: String(event.error || 'speech_error'),
},
status: { code: 500, message: 'client_tool_failed' },
},
toolDisplayName
);
};
window.speechSynthesis.speak(utterance);
return;
// Voice prompt playback is fire-and-forget; keep previous wait behavior stable.
// Client ack is returned immediately after dispatch.
}
window.speechSynthesis.speak(utterance);
resultPayload.output = { message: 'voice_prompt_sent', msg };
resultPayload.status = { code: 200, message: 'ok' };
} else {
resultPayload.output = { message: 'speech_synthesis_unavailable', msg };
resultPayload.status = { code: 503, message: 'speech_output_unavailable' };
}
} else if (toolName === 'text_msg_prompt') {
const msg = String(parsedArgs?.msg || '').trim();
@@ -3363,15 +3494,16 @@ export const DebugDrawer: React.FC<{
resultPayload.output = { message: "Missing required argument 'msg'" };
resultPayload.status = { code: 422, message: 'invalid_arguments' };
} else {
setChoicePromptDialog({ open: false, question: '', options: [] });
setTextPromptDialog({
open: true,
message: msg,
pendingResult: {
toolCallId: toolCallId,
toolName,
toolDisplayName,
waitForResponse,
enqueuePromptDialog({
kind: 'text',
payload: {
message: msg,
pendingResult: {
toolCallId: toolCallId,
toolName,
toolDisplayName,
waitForResponse,
},
},
});
if (!waitForResponse) {
@@ -3381,10 +3513,15 @@ export const DebugDrawer: React.FC<{
return;
}
}
} else if (toolName === 'choice_prompt') {
} else if (toolName === 'text_choice_prompt' || toolName === 'voice_choice_prompt') {
const question = String(parsedArgs?.question || '').trim();
const rawOptions = Array.isArray(parsedArgs?.options) ? parsedArgs.options : [];
const options = normalizeChoicePromptOptions(rawOptions);
const isVoiceChoicePrompt = toolName === 'voice_choice_prompt';
const voiceText = isVoiceChoicePrompt
? String(parsedArgs?.voice_text || parsedArgs?.voiceText || parsedArgs?.msg || question || '').trim()
: '';
const requireSelection = toolName === 'voice_choice_prompt' || toolName === 'text_choice_prompt';
if (!question) {
resultPayload.output = { message: "Missing required argument 'question'" };
resultPayload.status = { code: 422, message: 'invalid_arguments' };
@@ -3392,21 +3529,24 @@ export const DebugDrawer: React.FC<{
resultPayload.output = { message: "Argument 'options' requires at least 2 valid entries" };
resultPayload.status = { code: 422, message: 'invalid_arguments' };
} else {
setTextPromptDialog({ open: false, message: '' });
setChoicePromptDialog({
open: true,
question,
options,
pendingResult: {
toolCallId: toolCallId,
toolName,
toolDisplayName,
waitForResponse,
enqueuePromptDialog({
kind: 'choice',
payload: {
question,
options,
pendingResult: {
toolCallId: toolCallId,
toolName,
toolDisplayName,
waitForResponse,
},
requireSelection,
voiceText,
},
});
if (!waitForResponse) {
if (!waitForResponse && !requireSelection) {
resultPayload.output = {
message: 'choice_prompt_shown',
message: `${toolName}_shown`,
question,
options,
};
@@ -4149,17 +4289,26 @@ export const DebugDrawer: React.FC<{
{choicePromptDialog.open && (
<div className="absolute inset-0 z-40 flex items-center justify-center bg-black/55 backdrop-blur-[1px]">
<div className="relative w-[92%] max-w-md rounded-xl border border-white/15 bg-card/95 p-4 shadow-2xl animate-in zoom-in-95 duration-200">
<button
type="button"
onClick={() => closeChoicePromptDialog('dismiss')}
className="absolute right-3 top-3 rounded-sm opacity-70 hover:opacity-100 text-muted-foreground hover:text-foreground transition-opacity"
title="关闭"
>
<X className="h-4 w-4" />
</button>
{!choicePromptDialog.requireSelection && (
<button
type="button"
onClick={() => closeChoicePromptDialog('dismiss')}
className="absolute right-3 top-3 rounded-sm opacity-70 hover:opacity-100 text-muted-foreground hover:text-foreground transition-opacity"
title="关闭"
>
<X className="h-4 w-4" />
</button>
)}
<div className="mb-3 pr-6">
<div className="text-[10px] font-black tracking-[0.14em] uppercase text-cyan-300"></div>
<div className="text-[10px] font-black tracking-[0.14em] uppercase text-cyan-300">
{choicePromptDialog.requireSelection
? (choicePromptDialog.voiceText ? '语音选项提示' : '文本选项提示')
: '选项问题提示'}
</div>
<p className="mt-2 text-sm leading-6 text-foreground whitespace-pre-wrap break-words">{choicePromptDialog.question}</p>
{choicePromptDialog.requireSelection && (
<p className="mt-1 text-[11px] text-cyan-200/80"></p>
)}
</div>
<div className="space-y-2">
{choicePromptDialog.options.map((option) => (
@@ -4173,11 +4322,13 @@ export const DebugDrawer: React.FC<{
</Button>
))}
</div>
<div className="mt-3 flex justify-end">
<Button size="sm" variant="ghost" onClick={() => closeChoicePromptDialog('dismiss')}>
</Button>
</div>
{!choicePromptDialog.requireSelection && (
<div className="mt-3 flex justify-end">
<Button size="sm" variant="ghost" onClick={() => closeChoicePromptDialog('dismiss')}>
</Button>
</div>
)}
</div>
</div>
)}