Enhance WebSocket session management by requiring assistant_id as a query parameter for connection. Update API reference documentation to reflect changes in message flow and metadata validation rules, including the introduction of whitelists for allowed metadata fields and restrictions on sensitive keys. Refactor client examples to align with the new session initiation process.
This commit is contained in:
@@ -2842,6 +2842,61 @@ export const DebugDrawer: React.FC<{
|
||||
return error;
|
||||
};
|
||||
|
||||
const METADATA_OVERRIDE_WHITELIST = new Set([
|
||||
'firstTurnMode',
|
||||
'greeting',
|
||||
'generatedOpenerEnabled',
|
||||
'systemPrompt',
|
||||
'output',
|
||||
'bargeIn',
|
||||
'knowledge',
|
||||
'knowledgeBaseId',
|
||||
'openerAudio',
|
||||
'tools',
|
||||
]);
|
||||
const METADATA_FORBIDDEN_SECRET_TOKENS = ['apikey', 'token', 'secret', 'password', 'authorization'];
|
||||
const isPlainObject = (value: unknown): value is Record<string, any> => Boolean(value) && typeof value === 'object' && !Array.isArray(value);
|
||||
const isForbiddenSecretKey = (key: string): boolean => {
|
||||
const normalized = key.toLowerCase().replace(/[_-]/g, '');
|
||||
return METADATA_FORBIDDEN_SECRET_TOKENS.some((token) => normalized.includes(token));
|
||||
};
|
||||
const stripForbiddenSecretKeysDeep = (value: any): any => {
|
||||
if (Array.isArray(value)) return value.map(stripForbiddenSecretKeysDeep);
|
||||
if (!isPlainObject(value)) return value;
|
||||
return Object.entries(value).reduce<Record<string, any>>((acc, [key, nested]) => {
|
||||
if (isForbiddenSecretKey(key)) return acc;
|
||||
acc[key] = stripForbiddenSecretKeysDeep(nested);
|
||||
return acc;
|
||||
}, {});
|
||||
};
|
||||
const sanitizeMetadataForWs = (raw: unknown): Record<string, any> => {
|
||||
if (!isPlainObject(raw)) return { overrides: {} };
|
||||
const sanitized: Record<string, any> = { overrides: {} };
|
||||
|
||||
if (typeof raw.channel === 'string' && raw.channel.trim()) {
|
||||
sanitized.channel = raw.channel.trim();
|
||||
}
|
||||
if (typeof raw.source === 'string' && raw.source.trim()) {
|
||||
sanitized.source = raw.source.trim();
|
||||
}
|
||||
if (isPlainObject(raw.history) && raw.history.userId !== undefined) {
|
||||
sanitized.history = { userId: raw.history.userId };
|
||||
}
|
||||
if (isPlainObject(raw.dynamicVariables)) {
|
||||
sanitized.dynamicVariables = raw.dynamicVariables;
|
||||
}
|
||||
if (isPlainObject(raw.overrides)) {
|
||||
const overrides = Object.entries(raw.overrides).reduce<Record<string, any>>((acc, [key, value]) => {
|
||||
if (!METADATA_OVERRIDE_WHITELIST.has(key)) return acc;
|
||||
if (isForbiddenSecretKey(key)) return acc;
|
||||
acc[key] = stripForbiddenSecretKeysDeep(value);
|
||||
return acc;
|
||||
}, {});
|
||||
sanitized.overrides = overrides;
|
||||
}
|
||||
return sanitized;
|
||||
};
|
||||
|
||||
const buildDynamicVariablesPayload = (): { variables: Record<string, string>; error?: string } => {
|
||||
const variables: Record<string, string> = {};
|
||||
const nonEmptyRows = dynamicVariables
|
||||
@@ -2908,104 +2963,35 @@ export const DebugDrawer: React.FC<{
|
||||
|
||||
const buildLocalResolvedRuntime = () => {
|
||||
const warnings: string[] = [];
|
||||
const services: Record<string, any> = {};
|
||||
const ttsEnabled = Boolean(textTtsEnabled);
|
||||
const isExternalLlm = assistant.configMode === 'dify' || assistant.configMode === 'fastgpt';
|
||||
const knowledgeBaseId = String(assistant.knowledgeBaseId || '').trim();
|
||||
const knowledge = knowledgeBaseId
|
||||
? { enabled: true, kbId: knowledgeBaseId, nResults: 5 }
|
||||
: { enabled: false };
|
||||
|
||||
if (isExternalLlm) {
|
||||
services.llm = {
|
||||
provider: 'openai',
|
||||
model: '',
|
||||
apiKey: assistant.apiKey || '',
|
||||
baseUrl: assistant.apiUrl || '',
|
||||
};
|
||||
if (!assistant.apiUrl) warnings.push(`External LLM API URL is empty for mode: ${assistant.configMode}`);
|
||||
if (!assistant.apiKey) warnings.push(`External LLM API key is empty for mode: ${assistant.configMode}`);
|
||||
} else if (assistant.llmModelId) {
|
||||
const llm = llmModels.find((item) => item.id === assistant.llmModelId);
|
||||
if (llm) {
|
||||
services.llm = {
|
||||
provider: 'openai',
|
||||
model: llm.modelName || llm.name,
|
||||
apiKey: llm.apiKey,
|
||||
baseUrl: llm.baseUrl,
|
||||
};
|
||||
} else {
|
||||
warnings.push(`LLM model not found in loaded list: ${assistant.llmModelId}`);
|
||||
}
|
||||
} else {
|
||||
// Keep empty object to indicate engine should use default provider model.
|
||||
services.llm = {};
|
||||
}
|
||||
|
||||
if (assistant.asrModelId) {
|
||||
const asr = asrModels.find((item) => item.id === assistant.asrModelId);
|
||||
if (asr) {
|
||||
const asrProvider = isOpenAICompatibleVendor(asr.vendor) ? 'openai_compatible' : 'buffered';
|
||||
services.asr = {
|
||||
provider: asrProvider,
|
||||
model: asr.modelName || asr.name,
|
||||
apiKey: asrProvider === 'openai_compatible' ? asr.apiKey : null,
|
||||
};
|
||||
} else {
|
||||
warnings.push(`ASR model not found in loaded list: ${assistant.asrModelId}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (assistant.voice) {
|
||||
const voice = voices.find((item) => item.id === assistant.voice);
|
||||
if (voice) {
|
||||
const ttsProvider = isOpenAICompatibleVendor(voice.vendor) ? 'openai_compatible' : 'edge';
|
||||
services.tts = {
|
||||
enabled: ttsEnabled,
|
||||
provider: ttsProvider,
|
||||
model: voice.model,
|
||||
apiKey: ttsProvider === 'openai_compatible' ? voice.apiKey : null,
|
||||
voice: resolveRuntimeTtsVoice(assistant.voice, voice),
|
||||
speed: assistant.speed || voice.speed || 1.0,
|
||||
};
|
||||
} else {
|
||||
services.tts = {
|
||||
enabled: ttsEnabled,
|
||||
voice: assistant.voice,
|
||||
speed: assistant.speed || 1.0,
|
||||
};
|
||||
warnings.push(`Voice resource not found in loaded list: ${assistant.voice}`);
|
||||
}
|
||||
} else if (!ttsEnabled) {
|
||||
services.tts = {
|
||||
enabled: false,
|
||||
};
|
||||
}
|
||||
|
||||
const localResolved = {
|
||||
assistantId: assistant.id,
|
||||
warnings,
|
||||
sessionStartMetadata: {
|
||||
output: {
|
||||
mode: ttsEnabled ? 'audio' : 'text',
|
||||
overrides: {
|
||||
output: {
|
||||
mode: ttsEnabled ? 'audio' : 'text',
|
||||
},
|
||||
systemPrompt: assistant.prompt || '',
|
||||
firstTurnMode: assistant.firstTurnMode || 'bot_first',
|
||||
greeting: assistant.opener || '',
|
||||
generatedOpenerEnabled: assistant.generatedOpenerEnabled === true,
|
||||
bargeIn: {
|
||||
enabled: assistant.botCannotBeInterrupted !== true,
|
||||
minDurationMs: Math.max(0, Number(assistant.interruptionSensitivity ?? 180)),
|
||||
},
|
||||
knowledgeBaseId,
|
||||
knowledge,
|
||||
tools: selectedToolSchemas,
|
||||
},
|
||||
systemPrompt: assistant.prompt || '',
|
||||
firstTurnMode: assistant.firstTurnMode || 'bot_first',
|
||||
greeting: assistant.opener || '',
|
||||
generatedOpenerEnabled: assistant.generatedOpenerEnabled === true,
|
||||
bargeIn: {
|
||||
enabled: assistant.botCannotBeInterrupted !== true,
|
||||
minDurationMs: Math.max(0, Number(assistant.interruptionSensitivity ?? 180)),
|
||||
},
|
||||
knowledgeBaseId,
|
||||
knowledge,
|
||||
tools: selectedToolSchemas,
|
||||
services,
|
||||
history: {
|
||||
assistantId: assistant.id,
|
||||
userId: 1,
|
||||
source: 'debug',
|
||||
},
|
||||
source: 'web_debug',
|
||||
},
|
||||
};
|
||||
|
||||
@@ -3020,21 +3006,13 @@ export const DebugDrawer: React.FC<{
|
||||
}
|
||||
setDynamicVariablesError('');
|
||||
const localResolved = buildLocalResolvedRuntime();
|
||||
const mergedMetadata: Record<string, any> = {
|
||||
const mergedMetadata: Record<string, any> = sanitizeMetadataForWs({
|
||||
...localResolved.sessionStartMetadata,
|
||||
...(sessionMetadataExtras || {}),
|
||||
};
|
||||
});
|
||||
if (Object.keys(dynamicVariablesResult.variables).length > 0) {
|
||||
mergedMetadata.dynamicVariables = dynamicVariablesResult.variables;
|
||||
}
|
||||
// Engine resolves trusted runtime config by top-level assistant/app ID.
|
||||
// Keep these IDs at metadata root so backend /assistants/{id}/config is reachable.
|
||||
if (!mergedMetadata.assistantId && assistant.id) {
|
||||
mergedMetadata.assistantId = assistant.id;
|
||||
}
|
||||
if (!mergedMetadata.appId && assistant.id) {
|
||||
mergedMetadata.appId = assistant.id;
|
||||
}
|
||||
if (!mergedMetadata.channel) {
|
||||
mergedMetadata.channel = 'web_debug';
|
||||
}
|
||||
@@ -3069,6 +3047,24 @@ export const DebugDrawer: React.FC<{
|
||||
if (isOpen) setWsStatus('disconnected');
|
||||
};
|
||||
|
||||
const buildSessionWsUrl = () => {
|
||||
const base = wsUrl.trim();
|
||||
if (!base) return '';
|
||||
try {
|
||||
const parsed = new URL(base);
|
||||
parsed.searchParams.set('assistant_id', assistant.id);
|
||||
return parsed.toString();
|
||||
} catch {
|
||||
try {
|
||||
const parsed = new URL(base, window.location.href);
|
||||
parsed.searchParams.set('assistant_id', assistant.id);
|
||||
return parsed.toString();
|
||||
} catch {
|
||||
return base;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const ensureWsSession = async () => {
|
||||
if (wsRef.current && wsReadyRef.current && wsRef.current.readyState === WebSocket.OPEN) {
|
||||
return;
|
||||
@@ -3083,18 +3079,25 @@ export const DebugDrawer: React.FC<{
|
||||
}
|
||||
|
||||
const metadata = await fetchRuntimeMetadata();
|
||||
const sessionWsUrl = buildSessionWsUrl();
|
||||
setWsStatus('connecting');
|
||||
setWsError('');
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
pendingResolveRef.current = resolve;
|
||||
pendingRejectRef.current = reject;
|
||||
const ws = new WebSocket(wsUrl);
|
||||
const ws = new WebSocket(sessionWsUrl);
|
||||
ws.binaryType = 'arraybuffer';
|
||||
wsRef.current = ws;
|
||||
|
||||
ws.onopen = () => {
|
||||
ws.send(JSON.stringify({ type: 'hello', version: 'v1' }));
|
||||
ws.send(
|
||||
JSON.stringify({
|
||||
type: 'session.start',
|
||||
audio: { encoding: 'pcm_s16le', sample_rate_hz: 16000, channels: 1 },
|
||||
metadata,
|
||||
})
|
||||
);
|
||||
};
|
||||
|
||||
ws.onmessage = (event) => {
|
||||
@@ -3118,16 +3121,6 @@ export const DebugDrawer: React.FC<{
|
||||
if (onProtocolEvent) {
|
||||
onProtocolEvent(payload);
|
||||
}
|
||||
if (type === 'hello.ack') {
|
||||
ws.send(
|
||||
JSON.stringify({
|
||||
type: 'session.start',
|
||||
audio: { encoding: 'pcm_s16le', sample_rate_hz: 16000, channels: 1 },
|
||||
metadata,
|
||||
})
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (type === 'output.audio.start') {
|
||||
// New utterance audio starts: cancel old queued/playing audio to avoid overlap.
|
||||
stopPlaybackImmediately();
|
||||
|
||||
Reference in New Issue
Block a user