api has llm response event

2026-02-04 12:00:52 +08:00
parent 5aa9a12ca8
commit 7d255468ab
4 changed files with 63 additions and 9 deletions
--- a/examples/mic_client.py
+++ b/examples/mic_client.py
@@ -4,10 +4,12 @@ Microphone client for testing duplex voice conversation.

 This client captures audio from the microphone, sends it to the server,
 and plays back the AI's voice response through the speakers.
+It also displays the LLM's text responses in the console.

 Usage:
    python examples/mic_client.py --url ws://localhost:8000/ws
    python examples/mic_client.py --url ws://localhost:8000/ws --chat "Hello!"
+    python examples/mic_client.py --url ws://localhost:8000/ws --verbose

 Requirements:
    pip install sounddevice soundfile websockets numpy
@@ -101,6 +103,9 @@ class MicrophoneClient:
        # Interrupt handling - discard audio until next trackStart
        self._discard_audio = False
        self._audio_sequence = 0  # Track audio sequence to detect stale chunks
+        
+        # Verbose mode for streaming LLM responses
+        self.verbose = False
    
    async def connect(self) -> None:
        """Connect to WebSocket server."""
@@ -314,6 +319,17 @@ class MicrophoneClient:
            # Server-side TTFB event
            latency_ms = event.get("latencyMs", 0)
            print(f"← [TTFB] Server reported latency: {latency_ms}ms")
+        elif event_type == "llmResponse":
+            # LLM text response
+            text = event.get("text", "")
+            is_final = event.get("isFinal", False)
+            if is_final:
+                # Print final LLM response
+                print(f"← AI: {text}")
+            elif self.verbose:
+                # Show streaming chunks only in verbose mode
+                display_text = text[:60] + "..." if len(text) > 60 else text
+                print(f"  [streaming] {display_text}")
        elif event_type == "trackStart":
            print("← Bot started speaking")
            # IMPORTANT: Accept audio again after trackStart
@@ -552,6 +568,11 @@ async def main():
        action="store_true",
        help="Disable interactive mode"
    )
+    parser.add_argument(
+        "--verbose", "-v",
+        action="store_true",
+        help="Show streaming LLM response chunks"
+    )
    
    args = parser.parse_args()
    
@@ -565,6 +586,7 @@ async def main():
        input_device=args.input_device,
        output_device=args.output_device
    )
+    client.verbose = args.verbose
    
    await client.run(
        chat_message=args.chat,
--- a/examples/wav_client.py
+++ b/examples/wav_client.py
@@ -115,7 +115,13 @@ class WavFileClient:
            "direction": direction,
            "message": message
        })
-        print(f"{direction} {message}")
+        # Handle encoding errors on Windows
+        try:
+            print(f"{direction} {message}")
+        except UnicodeEncodeError:
+            # Replace problematic characters for console output
+            safe_message = message.encode('ascii', errors='replace').decode('ascii')
+            print(f"{direction} {safe_message}")
    
    async def connect(self) -> None:
        """Connect to WebSocket server."""
@@ -285,6 +291,14 @@ class WavFileClient:
        elif event_type == "ttfb":
            latency_ms = event.get("latencyMs", 0)
            self.log_event("←", f"[TTFB] Server latency: {latency_ms}ms")
+        elif event_type == "llmResponse":
+            text = event.get("text", "")
+            is_final = event.get("isFinal", False)
+            if is_final:
+                self.log_event("←", f"LLM Response (final): {text[:100]}{'...' if len(text) > 100 else ''}")
+            elif self.verbose:
+                # Show streaming chunks only in verbose mode
+                self.log_event("←", f"LLM: {text}")
        elif event_type == "trackStart":
            self.track_started = True
            self.log_event("←", "Bot started speaking")