Update text mode debug

This commit is contained in:
Xin Wang
2026-02-09 11:25:25 +08:00
parent f4aa432f0e
commit 17bd4a78f4

View File

@@ -1065,6 +1065,7 @@ export const DebugDrawer: React.FC<{
const assistantDraftIndexRef = useRef<number | null>(null);
const audioCtxRef = useRef<AudioContext | null>(null);
const playbackTimeRef = useRef<number>(0);
const activeAudioSourcesRef = useRef<Set<AudioBufferSourceNode>>(new Set());
const [devices, setDevices] = useState<MediaDeviceInfo[]>([]);
const [selectedCamera, setSelectedCamera] = useState<string>('');
@@ -1151,6 +1152,19 @@ export const DebugDrawer: React.FC<{
playbackTimeRef.current = ctx.currentTime;
};
const stopPlaybackImmediately = () => {
activeAudioSourcesRef.current.forEach((source) => {
try {
source.stop();
} catch {
// no-op
}
source.disconnect();
});
activeAudioSourcesRef.current.clear();
clearPlaybackQueue();
};
const playPcm16Chunk = async (pcmBuffer: ArrayBuffer) => {
if (!textTtsEnabled) return;
if (mode !== 'text') return;
@@ -1170,6 +1184,15 @@ export const DebugDrawer: React.FC<{
const source = ctx.createBufferSource();
source.buffer = audioBuffer;
source.connect(ctx.destination);
activeAudioSourcesRef.current.add(source);
source.onended = () => {
activeAudioSourcesRef.current.delete(source);
try {
source.disconnect();
} catch {
// no-op
}
};
const startAt = Math.max(ctx.currentTime + 0.02, playbackTimeRef.current);
source.start(startAt);
@@ -1228,6 +1251,11 @@ export const DebugDrawer: React.FC<{
if (mode === 'text') {
if (textTtsEnabled) await ensureAudioContext();
await ensureWsSession();
// Interrupt any in-flight response/audio before sending new user utterance.
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(JSON.stringify({ type: 'response.cancel', graceful: false }));
}
stopPlaybackImmediately();
wsRef.current?.send(JSON.stringify({ type: 'input.text', text: userMsg }));
} else {
setTimeout(() => {
@@ -1391,7 +1419,7 @@ export const DebugDrawer: React.FC<{
pendingRejectRef.current = null;
assistantDraftIndexRef.current = null;
setTextSessionStarted(false);
clearPlaybackQueue();
stopPlaybackImmediately();
if (isOpen) setWsStatus('disconnected');
};
@@ -1456,7 +1484,15 @@ export const DebugDrawer: React.FC<{
return;
}
if (type === 'output.audio.start') {
clearPlaybackQueue();
// New utterance audio starts: cancel old queued/playing audio to avoid overlap.
stopPlaybackImmediately();
return;
}
if (type === 'response.interrupted') {
assistantDraftIndexRef.current = null;
setIsLoading(false);
stopPlaybackImmediately();
return;
}
@@ -1541,7 +1577,7 @@ export const DebugDrawer: React.FC<{
ws.onclose = () => {
wsReadyRef.current = false;
setTextSessionStarted(false);
clearPlaybackQueue();
stopPlaybackImmediately();
if (wsStatus !== 'error') setWsStatus('disconnected');
};
});
@@ -1549,7 +1585,7 @@ export const DebugDrawer: React.FC<{
useEffect(() => {
if (!textTtsEnabled) {
clearPlaybackQueue();
stopPlaybackImmediately();
}
}, [textTtsEnabled]);
@@ -1660,13 +1696,10 @@ export const DebugDrawer: React.FC<{
<div className="flex-1 overflow-hidden flex flex-col min-h-0 mb-4">
{mode === 'text' ? (
textSessionStarted ? (
<div className="flex-1 flex flex-col min-h-0 space-y-2">
<div className="flex flex-col h-full animate-in fade-in">
<div className="flex-1 flex flex-col min-h-0 animate-in fade-in">
<div className="h-[52vh] min-h-[260px] max-h-[52vh]">
<TranscriptionLog />
</div>
<Button variant="destructive" size="sm" className="w-full h-10 font-bold" onClick={closeWs}>
<PhoneOff className="mr-2 h-4 w-4" />
</Button>
</div>
) : wsStatus === 'connecting' ? (
<div className="flex-1 flex flex-col items-center justify-center space-y-6">
@@ -1769,6 +1802,11 @@ export const DebugDrawer: React.FC<{
<div className="shrink-0 space-y-2">
<div className="flex space-x-2">
{mode === 'text' && textSessionStarted && (
<Button variant="destructive" size="sm" className="h-9 font-bold shrink-0" onClick={closeWs}>
<PhoneOff className="mr-2 h-4 w-4" />
</Button>
)}
<Input
value={inputText}
onChange={e => setInputText(e.target.value)}