Enhance DuplexPipeline and AssistantsPage for improved interruption handling. Introduce _OPENER_PRE_ROLL_MS constant for a head start on mic capture, and adjust interruption sensitivity settings from 500ms to 180ms across relevant components to optimize responsiveness during assistant interactions.

This commit is contained in:
Xin Wang
2026-02-27 11:51:15 +08:00
parent 6a9b5fcff4
commit 71cbfa2b48
2 changed files with 16 additions and 11 deletions

View File

@@ -73,6 +73,7 @@ class DuplexPipeline:
_ASR_DELTA_THROTTLE_MS = 500 _ASR_DELTA_THROTTLE_MS = 500
_LLM_DELTA_THROTTLE_MS = 80 _LLM_DELTA_THROTTLE_MS = 80
_ASR_CAPTURE_MAX_MS = 15000 _ASR_CAPTURE_MAX_MS = 15000
_OPENER_PRE_ROLL_MS = 180
_DEFAULT_TOOL_SCHEMAS: Dict[str, Dict[str, Any]] = { _DEFAULT_TOOL_SCHEMAS: Dict[str, Dict[str, Any]] = {
"current_time": { "current_time": {
"name": "current_time", "name": "current_time",
@@ -781,6 +782,9 @@ class DuplexPipeline:
) )
await self.conversation.add_assistant_turn(greeting_to_speak) await self.conversation.add_assistant_turn(greeting_to_speak)
# Give client mic capture a short head start so opener can be interrupted immediately.
await asyncio.sleep(self._OPENER_PRE_ROLL_MS / 1000.0)
used_preloaded_audio = await self._play_preloaded_opener_audio() used_preloaded_audio = await self._play_preloaded_opener_audio()
if self._tts_output_enabled() and not used_preloaded_audio: if self._tts_output_enabled() and not used_preloaded_audio:
# Keep opener text ahead of opener voice start. # Keep opener text ahead of opener voice start.

View File

@@ -178,7 +178,7 @@ export const AssistantsPage: React.FC = () => {
hotwords: [], hotwords: [],
tools: [], tools: [],
botCannotBeInterrupted: false, botCannotBeInterrupted: false,
interruptionSensitivity: 500, interruptionSensitivity: 180,
configMode: 'platform', configMode: 'platform',
}; };
try { try {
@@ -1049,7 +1049,7 @@ export const AssistantsPage: React.FC = () => {
<div className="relative"> <div className="relative">
<Input <Input
type="number" type="number"
value={selectedAssistant.interruptionSensitivity || 500} value={selectedAssistant.interruptionSensitivity || 180}
onChange={(e) => updateAssistant('interruptionSensitivity', parseInt(e.target.value) || 0)} onChange={(e) => updateAssistant('interruptionSensitivity', parseInt(e.target.value) || 0)}
className="w-20 h-8 text-right pr-7 text-xs font-mono bg-black/40 border-white/5" className="w-20 h-8 text-right pr-7 text-xs font-mono bg-black/40 border-white/5"
/> />
@@ -1063,7 +1063,7 @@ export const AssistantsPage: React.FC = () => {
min="0" min="0"
max="2000" max="2000"
step="50" step="50"
value={selectedAssistant.interruptionSensitivity || 500} value={selectedAssistant.interruptionSensitivity || 180}
onChange={(e) => updateAssistant('interruptionSensitivity', parseInt(e.target.value))} onChange={(e) => updateAssistant('interruptionSensitivity', parseInt(e.target.value))}
className="flex-1 h-1.5 bg-secondary rounded-lg appearance-none cursor-pointer accent-primary" className="flex-1 h-1.5 bg-secondary rounded-lg appearance-none cursor-pointer accent-primary"
/> />
@@ -2076,8 +2076,9 @@ export const DebugDrawer: React.FC<{
setDynamicVariablesError(''); setDynamicVariablesError('');
closeWs(); closeWs();
if (textTtsEnabled) await ensureAudioContext(); if (textTtsEnabled) await ensureAudioContext();
await ensureWsSession(); // Start mic capture before session.start so barge-in works from opener start.
await startVoiceCapture(); await startVoiceCapture();
await ensureWsSession();
setCallStatus('active'); setCallStatus('active');
} catch (e) { } catch (e) {
console.error(e); console.error(e);
@@ -2378,7 +2379,7 @@ export const DebugDrawer: React.FC<{
generatedOpenerEnabled: assistant.generatedOpenerEnabled === true, generatedOpenerEnabled: assistant.generatedOpenerEnabled === true,
bargeIn: { bargeIn: {
enabled: assistant.botCannotBeInterrupted !== true, enabled: assistant.botCannotBeInterrupted !== true,
minDurationMs: assistant.interruptionSensitivity || 500, minDurationMs: Math.max(0, Number(assistant.interruptionSensitivity ?? 180)),
}, },
knowledgeBaseId, knowledgeBaseId,
knowledge, knowledge,