voice barge-in is ok

2026-01-29 17:47:15 +08:00
parent d6d0ade33e
commit aa4316de6f
3 changed files with 112 additions and 41 deletions
--- a/examples/mic_client.py
+++ b/examples/mic_client.py
@@ -151,53 +151,57 @@ class MicrophoneClient:
        with self.audio_output_lock:
            self.audio_output_buffer += audio_data
    
-    async def _playback_task(self):
-        """Background task to play buffered audio smoothly using output stream."""
-        # Use a continuous output stream for smooth playback
-        chunk_samples = int(self.sample_rate * 0.05)  # 50ms chunks
-        chunk_bytes = chunk_samples * 2  # 16-bit = 2 bytes per sample
+    def _playback_thread_func(self):
+        """Thread function for continuous audio playback."""
+        import time
        
-        def output_callback(outdata, frames, time_info, status):
-            """Audio output callback."""
-            if status:
-                print(f"Output status: {status}")
-            
-            bytes_needed = frames * 2
-            with self.audio_output_lock:
-                if len(self.audio_output_buffer) >= bytes_needed:
-                    audio_data = self.audio_output_buffer[:bytes_needed]
-                    self.audio_output_buffer = self.audio_output_buffer[bytes_needed:]
-                    samples = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32767.0
-                    outdata[:, 0] = samples
-                else:
-                    outdata.fill(0)
+        # Chunk size: 50ms of audio
+        chunk_samples = int(self.sample_rate * 0.05)
+        chunk_bytes = chunk_samples * 2
+        
+        print(f"Audio playback thread started (device: {self.output_device or 'default'})")
        
-        # Create and start output stream
        try:
-            output_stream = sd.OutputStream(
+            # Create output stream with callback
+            with sd.OutputStream(
                samplerate=self.sample_rate,
                channels=1,
-                dtype=np.float32,
+                dtype='int16',
                blocksize=chunk_samples,
                device=self.output_device,
-                callback=output_callback,
                latency='low'
-            )
-            output_stream.start()
-            print(f"Audio output stream started (device: {self.output_device or 'default'})")
-            
-            # Keep stream running while client is active
-            while self.running:
-                await asyncio.sleep(0.1)
-            
-            output_stream.stop()
-            output_stream.close()
-            
+            ) as stream:
+                while self.running:
+                    # Get audio from buffer
+                    with self.audio_output_lock:
+                        if len(self.audio_output_buffer) >= chunk_bytes:
+                            audio_data = self.audio_output_buffer[:chunk_bytes]
+                            self.audio_output_buffer = self.audio_output_buffer[chunk_bytes:]
+                        else:
+                            # Not enough audio - output silence
+                            audio_data = b'\x00' * chunk_bytes
+                    
+                    # Convert to numpy array and write to stream
+                    samples = np.frombuffer(audio_data, dtype=np.int16).reshape(-1, 1)
+                    stream.write(samples)
+                    
        except Exception as e:
-            print(f"Playback error: {e}")
+            print(f"Playback thread error: {e}")
            import traceback
            traceback.print_exc()
    
+    async def _playback_task(self):
+        """Start playback thread and monitor it."""
+        # Run playback in a dedicated thread for reliable timing
+        playback_thread = threading.Thread(target=self._playback_thread_func, daemon=True)
+        playback_thread.start()
+        
+        # Wait for client to stop
+        while self.running and playback_thread.is_alive():
+            await asyncio.sleep(0.1)
+        
+        print("Audio playback stopped")
+    
    async def audio_sender(self) -> None:
        """Send audio from microphone to server."""
        while self.running:
@@ -274,10 +278,13 @@ class MicrophoneClient:
            text = event.get("text", "")
            is_final = event.get("isFinal", False)
            if is_final:
-                print(f"← You said: {text}")
+                # Clear the interim line and print final
+                print(" " * 80, end="\r")  # Clear previous interim text
+                print(f"→ You: {text}")
            else:
-                # Interim result - show with indicator
-                print(f"← [listening] {text}", end="\r")
+                # Interim result - show with indicator (overwrite same line)
+                display_text = text[:60] + "..." if len(text) > 60 else text
+                print(f"  [listening] {display_text}".ljust(80), end="\r")
        elif event_type == "trackStart":
            print("← Bot started speaking")
            # Clear any old audio in buffer
@@ -287,6 +294,11 @@ class MicrophoneClient:
            print("← Bot finished speaking")
        elif event_type == "interrupt":
            print("← Bot interrupted!")
+            # IMPORTANT: Clear audio buffer immediately on interrupt
+            with self.audio_output_lock:
+                buffer_ms = len(self.audio_output_buffer) / (self.sample_rate * 2) * 1000
+                self.audio_output_buffer = b""
+                print(f"   (cleared {buffer_ms:.0f}ms of buffered audio)")
        elif event_type == "error":
            print(f"← Error: {event.get('error')}")
        elif event_type == "hangup":