Voice debug drawer can select device and asr no duplicate

2026-02-09 16:47:58 +08:00
parent 38e20052f7
commit ab90b7c7df
1 changed files with 41 additions and 2 deletions
--- a/web/pages/Assistants.tsx
+++ b/web/pages/Assistants.tsx
@@ -1076,6 +1076,7 @@ export const DebugDrawer: React.FC<{
  const micProcessorRef = useRef<ScriptProcessorNode | null>(null);
  const micGainRef = useRef<GainNode | null>(null);
  const userDraftIndexRef = useRef<number | null>(null);
+  const lastUserFinalRef = useRef<string>('');

  // Initialize
  useEffect(() => {
@@ -1382,6 +1383,7 @@ export const DebugDrawer: React.FC<{
      try {
        setCallStatus('calling');
        setMessages([]);
+        lastUserFinalRef.current = '';
        setWsError('');
        closeWs();
        if (textTtsEnabled) await ensureAudioContext();
@@ -1406,6 +1408,7 @@ export const DebugDrawer: React.FC<{
    closeWs();
    setCallStatus('idle');
    setMessages([]);
+    lastUserFinalRef.current = '';
    setIsLoading(false);
  };

@@ -1450,6 +1453,7 @@ export const DebugDrawer: React.FC<{
      setWsError('');
      // Start every text debug run as a fresh session transcript.
      setMessages([]);
+      lastUserFinalRef.current = '';
      assistantDraftIndexRef.current = null;
      // Force a fresh WS session so updated assistant runtime config
      // (voice/model/provider/speed) is applied on session.start.
@@ -1566,6 +1570,7 @@ export const DebugDrawer: React.FC<{
    pendingRejectRef.current = null;
    assistantDraftIndexRef.current = null;
    userDraftIndexRef.current = null;
+    lastUserFinalRef.current = '';
    setTextSessionStarted(false);
    stopPlaybackImmediately();
    if (isOpen) setWsStatus('disconnected');
@@ -1670,7 +1675,8 @@ export const DebugDrawer: React.FC<{
              return next;
            }
            const next = [...prev];
-            next[idx] = { ...next[idx], text: next[idx].text + delta };
+            // ASR interim is typically the latest partial text, not a true text delta.
+            next[idx] = { ...next[idx], text: delta };
            return next;
          });
          return;
@@ -1678,15 +1684,37 @@ export const DebugDrawer: React.FC<{

        if (type === 'transcript.final') {
          const finalText = String(payload.text || '');
+          if (!finalText) {
+            userDraftIndexRef.current = null;
+            return;
+          }
+          if (lastUserFinalRef.current === finalText) {
+            userDraftIndexRef.current = null;
+            return;
+          }
          setMessages((prev) => {
            const idx = userDraftIndexRef.current;
            userDraftIndexRef.current = null;
            if (idx !== null && prev[idx] && prev[idx].role === 'user') {
              const next = [...prev];
              next[idx] = { ...next[idx], text: finalText || next[idx].text };
+              lastUserFinalRef.current = finalText;
              return next;
            }
-            if (!finalText) return prev;
+            const last = prev[prev.length - 1];
+            if (last?.role === 'user') {
+              if (last.text === finalText) {
+                lastUserFinalRef.current = finalText;
+                return prev;
+              }
+              if (finalText.startsWith(last.text) || last.text.startsWith(finalText)) {
+                const next = [...prev];
+                next[next.length - 1] = { ...last, text: finalText };
+                lastUserFinalRef.current = finalText;
+                return next;
+              }
+            }
+            lastUserFinalRef.current = finalText;
            return [...prev, { role: 'user', text: finalText }];
          });
          return;
@@ -1986,6 +2014,17 @@ export const DebugDrawer: React.FC<{
                <div className="flex-1 flex flex-col min-h-0 space-y-2">
                  {mode === 'voice' ? (
                    <div className="flex flex-col h-full min-h-0 animate-in fade-in">
+                      <div className="mb-2">
+                        <select
+                          className="w-full text-xs bg-white/5 border border-white/10 rounded px-2 py-1 text-foreground"
+                          value={selectedMic}
+                          onChange={(e) => setSelectedMic(e.target.value)}
+                        >
+                          {devices.filter(d => d.kind === 'audioinput').map(d => (
+                            <option key={d.deviceId} value={d.deviceId}>{d.label || 'Mic'}</option>
+                          ))}
+                        </select>
+                      </div>
                      <div className="h-1/3 min-h-[150px] shrink-0 border border-white/5 rounded-md bg-black/20 flex flex-col items-center justify-center text-muted-foreground space-y-4 mb-2 relative overflow-hidden">
                        <div className="h-24 w-24 rounded-full bg-primary/10 flex items-center justify-center animate-pulse relative z-10">
                          <Mic className="h-10 w-10 text-primary" />