Add audio context in debug drawer

2026-02-09 09:24:36 +08:00
parent 8edbe14382
commit 7c5b71a101
1 changed files with 79 additions and 0 deletions
--- a/web/pages/Assistants.tsx
+++ b/web/pages/Assistants.tsx
@@ -1061,11 +1061,14 @@ export const DebugDrawer: React.FC<{
  const pendingResolveRef = useRef<(() => void) | null>(null);
  const pendingRejectRef = useRef<((e: Error) => void) | null>(null);
  const assistantDraftIndexRef = useRef<number | null>(null);
+  const audioCtxRef = useRef<AudioContext | null>(null);
+  const playbackTimeRef = useRef<number>(0);

  const [devices, setDevices] = useState<MediaDeviceInfo[]>([]);
  const [selectedCamera, setSelectedCamera] = useState<string>('');
  const [selectedMic, setSelectedMic] = useState<string>('');
  const [isSwapped, setIsSwapped] = useState(false); 
+  const [textTtsEnabled, setTextTtsEnabled] = useState(true);

  // Initialize
  useEffect(() => {
@@ -1080,6 +1083,10 @@ export const DebugDrawer: React.FC<{
        setMode('text');
        stopMedia();
        closeWs();
+        if (audioCtxRef.current) {
+          void audioCtxRef.current.close();
+          audioCtxRef.current = null;
+        }
        setIsSwapped(false);
        setCallStatus('idle');
    }
@@ -1123,6 +1130,48 @@ export const DebugDrawer: React.FC<{
      }
  };

+  const ensureAudioContext = async () => {
+    if (!audioCtxRef.current) {
+      audioCtxRef.current = new AudioContext();
+      playbackTimeRef.current = audioCtxRef.current.currentTime;
+    }
+    if (audioCtxRef.current.state === 'suspended') {
+      await audioCtxRef.current.resume();
+    }
+    return audioCtxRef.current;
+  };
+
+  const clearPlaybackQueue = () => {
+    const ctx = audioCtxRef.current;
+    if (!ctx) return;
+    playbackTimeRef.current = ctx.currentTime;
+  };
+
+  const playPcm16Chunk = async (pcmBuffer: ArrayBuffer) => {
+    if (!textTtsEnabled) return;
+    if (mode !== 'text') return;
+    const ctx = await ensureAudioContext();
+    const int16 = new Int16Array(pcmBuffer);
+    if (int16.length === 0) return;
+
+    const float32 = new Float32Array(int16.length);
+    for (let i = 0; i < int16.length; i += 1) {
+      float32[i] = int16[i] / 32768;
+    }
+
+    const sampleRate = 16000;
+    const audioBuffer = ctx.createBuffer(1, float32.length, sampleRate);
+    audioBuffer.copyToChannel(float32, 0);
+
+    const source = ctx.createBufferSource();
+    source.buffer = audioBuffer;
+    source.connect(ctx.destination);
+
+    const startAt = Math.max(ctx.currentTime + 0.02, playbackTimeRef.current);
+    source.start(startAt);
+    playbackTimeRef.current = startAt + audioBuffer.duration;
+  };
+
  useEffect(() => {
    const handleStream = async () => {
      if (isOpen && mode === 'video' && callStatus === 'active') {
@@ -1173,6 +1222,7 @@ export const DebugDrawer: React.FC<{

    try {
      if (mode === 'text') {
+        if (textTtsEnabled) await ensureAudioContext();
        await ensureWsSession();
        wsRef.current?.send(JSON.stringify({ type: 'input.text', text: userMsg }));
      } else {
@@ -1321,6 +1371,7 @@ export const DebugDrawer: React.FC<{
    pendingResolveRef.current = null;
    pendingRejectRef.current = null;
    assistantDraftIndexRef.current = null;
+    clearPlaybackQueue();
    if (isOpen) setWsStatus('disconnected');
  };

@@ -1357,6 +1408,14 @@ export const DebugDrawer: React.FC<{
      };

      ws.onmessage = (event) => {
+        if (event.data instanceof ArrayBuffer) {
+          void playPcm16Chunk(event.data);
+          return;
+        }
+        if (event.data instanceof Blob) {
+          void event.data.arrayBuffer().then((buf) => playPcm16Chunk(buf));
+          return;
+        }
        if (typeof event.data !== 'string') return;
        let payload: any;
        try {
@@ -1376,6 +1435,10 @@ export const DebugDrawer: React.FC<{
          );
          return;
        }
+        if (type === 'output.audio.start') {
+          clearPlaybackQueue();
+          return;
+        }

        if (type === 'session.started') {
          wsReadyRef.current = true;
@@ -1457,11 +1520,18 @@ export const DebugDrawer: React.FC<{

      ws.onclose = () => {
        wsReadyRef.current = false;
+        clearPlaybackQueue();
        if (wsStatus !== 'error') setWsStatus('disconnected');
      };
    });
  };

+  useEffect(() => {
+    if (!textTtsEnabled) {
+      clearPlaybackQueue();
+    }
+  }, [textTtsEnabled]);
+
  useEffect(() => {
    if (!isOpen) return;
    const localResolved = buildLocalResolvedRuntime();
@@ -1525,6 +1595,15 @@ export const DebugDrawer: React.FC<{
                    <Badge variant="outline" className="text-xs">
                      WS: {wsStatus}
                    </Badge>
+                    <label className="inline-flex items-center gap-1 text-xs text-muted-foreground px-2 py-1 rounded border border-white/10">
+                      <input
+                        type="checkbox"
+                        checked={textTtsEnabled}
+                        onChange={(e) => setTextTtsEnabled(e.target.checked)}
+                        className="accent-primary"
+                      />
+                      TTS
+                    </label>
                    <Button size="sm" variant="secondary" onClick={() => ensureWsSession()} disabled={wsStatus === 'connecting'}>
                      Connect
                    </Button>