api has llm response event

This commit is contained in:
Xin Wang
2026-02-04 12:00:52 +08:00
parent 5aa9a12ca8
commit 7d255468ab
4 changed files with 63 additions and 9 deletions

View File

@@ -4,10 +4,12 @@ Microphone client for testing duplex voice conversation.
This client captures audio from the microphone, sends it to the server,
and plays back the AI's voice response through the speakers.
It also displays the LLM's text responses in the console.
Usage:
python examples/mic_client.py --url ws://localhost:8000/ws
python examples/mic_client.py --url ws://localhost:8000/ws --chat "Hello!"
python examples/mic_client.py --url ws://localhost:8000/ws --verbose
Requirements:
pip install sounddevice soundfile websockets numpy
@@ -101,6 +103,9 @@ class MicrophoneClient:
# Interrupt handling - discard audio until next trackStart
self._discard_audio = False
self._audio_sequence = 0 # Track audio sequence to detect stale chunks
# Verbose mode for streaming LLM responses
self.verbose = False
async def connect(self) -> None:
"""Connect to WebSocket server."""
@@ -314,6 +319,17 @@ class MicrophoneClient:
# Server-side TTFB event
latency_ms = event.get("latencyMs", 0)
print(f"← [TTFB] Server reported latency: {latency_ms}ms")
elif event_type == "llmResponse":
# LLM text response
text = event.get("text", "")
is_final = event.get("isFinal", False)
if is_final:
# Print final LLM response
print(f"← AI: {text}")
elif self.verbose:
# Show streaming chunks only in verbose mode
display_text = text[:60] + "..." if len(text) > 60 else text
print(f" [streaming] {display_text}")
elif event_type == "trackStart":
print("← Bot started speaking")
# IMPORTANT: Accept audio again after trackStart
@@ -552,6 +568,11 @@ async def main():
action="store_true",
help="Disable interactive mode"
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Show streaming LLM response chunks"
)
args = parser.parse_args()
@@ -565,6 +586,7 @@ async def main():
input_device=args.input_device,
output_device=args.output_device
)
client.verbose = args.verbose
await client.run(
chat_message=args.chat,

View File

@@ -115,7 +115,13 @@ class WavFileClient:
"direction": direction,
"message": message
})
print(f"{direction} {message}")
# Handle encoding errors on Windows
try:
print(f"{direction} {message}")
except UnicodeEncodeError:
# Replace problematic characters for console output
safe_message = message.encode('ascii', errors='replace').decode('ascii')
print(f"{direction} {safe_message}")
async def connect(self) -> None:
"""Connect to WebSocket server."""
@@ -285,6 +291,14 @@ class WavFileClient:
elif event_type == "ttfb":
latency_ms = event.get("latencyMs", 0)
self.log_event("", f"[TTFB] Server latency: {latency_ms}ms")
elif event_type == "llmResponse":
text = event.get("text", "")
is_final = event.get("isFinal", False)
if is_final:
self.log_event("", f"LLM Response (final): {text[:100]}{'...' if len(text) > 100 else ''}")
elif self.verbose:
# Show streaming chunks only in verbose mode
self.log_event("", f"LLM: {text}")
elif event_type == "trackStart":
self.track_started = True
self.log_event("", "Bot started speaking")