voice barge-in is ok
This commit is contained in:
@@ -151,53 +151,57 @@ class MicrophoneClient:
|
||||
with self.audio_output_lock:
|
||||
self.audio_output_buffer += audio_data
|
||||
|
||||
async def _playback_task(self):
|
||||
"""Background task to play buffered audio smoothly using output stream."""
|
||||
# Use a continuous output stream for smooth playback
|
||||
chunk_samples = int(self.sample_rate * 0.05) # 50ms chunks
|
||||
chunk_bytes = chunk_samples * 2 # 16-bit = 2 bytes per sample
|
||||
def _playback_thread_func(self):
|
||||
"""Thread function for continuous audio playback."""
|
||||
import time
|
||||
|
||||
def output_callback(outdata, frames, time_info, status):
|
||||
"""Audio output callback."""
|
||||
if status:
|
||||
print(f"Output status: {status}")
|
||||
|
||||
bytes_needed = frames * 2
|
||||
with self.audio_output_lock:
|
||||
if len(self.audio_output_buffer) >= bytes_needed:
|
||||
audio_data = self.audio_output_buffer[:bytes_needed]
|
||||
self.audio_output_buffer = self.audio_output_buffer[bytes_needed:]
|
||||
samples = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32767.0
|
||||
outdata[:, 0] = samples
|
||||
else:
|
||||
outdata.fill(0)
|
||||
# Chunk size: 50ms of audio
|
||||
chunk_samples = int(self.sample_rate * 0.05)
|
||||
chunk_bytes = chunk_samples * 2
|
||||
|
||||
print(f"Audio playback thread started (device: {self.output_device or 'default'})")
|
||||
|
||||
# Create and start output stream
|
||||
try:
|
||||
output_stream = sd.OutputStream(
|
||||
# Create output stream with callback
|
||||
with sd.OutputStream(
|
||||
samplerate=self.sample_rate,
|
||||
channels=1,
|
||||
dtype=np.float32,
|
||||
dtype='int16',
|
||||
blocksize=chunk_samples,
|
||||
device=self.output_device,
|
||||
callback=output_callback,
|
||||
latency='low'
|
||||
)
|
||||
output_stream.start()
|
||||
print(f"Audio output stream started (device: {self.output_device or 'default'})")
|
||||
|
||||
# Keep stream running while client is active
|
||||
while self.running:
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
output_stream.stop()
|
||||
output_stream.close()
|
||||
|
||||
) as stream:
|
||||
while self.running:
|
||||
# Get audio from buffer
|
||||
with self.audio_output_lock:
|
||||
if len(self.audio_output_buffer) >= chunk_bytes:
|
||||
audio_data = self.audio_output_buffer[:chunk_bytes]
|
||||
self.audio_output_buffer = self.audio_output_buffer[chunk_bytes:]
|
||||
else:
|
||||
# Not enough audio - output silence
|
||||
audio_data = b'\x00' * chunk_bytes
|
||||
|
||||
# Convert to numpy array and write to stream
|
||||
samples = np.frombuffer(audio_data, dtype=np.int16).reshape(-1, 1)
|
||||
stream.write(samples)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Playback error: {e}")
|
||||
print(f"Playback thread error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
async def _playback_task(self):
|
||||
"""Start playback thread and monitor it."""
|
||||
# Run playback in a dedicated thread for reliable timing
|
||||
playback_thread = threading.Thread(target=self._playback_thread_func, daemon=True)
|
||||
playback_thread.start()
|
||||
|
||||
# Wait for client to stop
|
||||
while self.running and playback_thread.is_alive():
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
print("Audio playback stopped")
|
||||
|
||||
async def audio_sender(self) -> None:
|
||||
"""Send audio from microphone to server."""
|
||||
while self.running:
|
||||
@@ -274,10 +278,13 @@ class MicrophoneClient:
|
||||
text = event.get("text", "")
|
||||
is_final = event.get("isFinal", False)
|
||||
if is_final:
|
||||
print(f"← You said: {text}")
|
||||
# Clear the interim line and print final
|
||||
print(" " * 80, end="\r") # Clear previous interim text
|
||||
print(f"→ You: {text}")
|
||||
else:
|
||||
# Interim result - show with indicator
|
||||
print(f"← [listening] {text}", end="\r")
|
||||
# Interim result - show with indicator (overwrite same line)
|
||||
display_text = text[:60] + "..." if len(text) > 60 else text
|
||||
print(f" [listening] {display_text}".ljust(80), end="\r")
|
||||
elif event_type == "trackStart":
|
||||
print("← Bot started speaking")
|
||||
# Clear any old audio in buffer
|
||||
@@ -287,6 +294,11 @@ class MicrophoneClient:
|
||||
print("← Bot finished speaking")
|
||||
elif event_type == "interrupt":
|
||||
print("← Bot interrupted!")
|
||||
# IMPORTANT: Clear audio buffer immediately on interrupt
|
||||
with self.audio_output_lock:
|
||||
buffer_ms = len(self.audio_output_buffer) / (self.sample_rate * 2) * 1000
|
||||
self.audio_output_buffer = b""
|
||||
print(f" (cleared {buffer_ms:.0f}ms of buffered audio)")
|
||||
elif event_type == "error":
|
||||
print(f"← Error: {event.get('error')}")
|
||||
elif event_type == "hangup":
|
||||
|
||||
Reference in New Issue
Block a user